diff options
author | John Koleszar <jkoleszar@google.com> | 2011-02-11 00:05:19 -0500 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2011-02-11 00:05:19 -0500 |
commit | 64aebb6c7ad2983d422325babf42594bd3b6a45e (patch) | |
tree | bc2899bf5f91319888520f9fbddacf8c5292ffa2 /vp8 | |
parent | b25ebeabeba2b8988859452e719dfa86892a2d3d (diff) | |
parent | 6f53e59641fb75189d888271a1dc87f224f04a2d (diff) | |
download | libvpx-64aebb6c7ad2983d422325babf42594bd3b6a45e.tar libvpx-64aebb6c7ad2983d422325babf42594bd3b6a45e.tar.gz libvpx-64aebb6c7ad2983d422325babf42594bd3b6a45e.tar.bz2 libvpx-64aebb6c7ad2983d422325babf42594bd3b6a45e.zip |
Merge remote branch 'internal/upstream' into HEAD
Diffstat (limited to 'vp8')
-rw-r--r-- | vp8/common/arm/bilinearfilter_arm.c | 21 | ||||
-rw-r--r-- | vp8/common/arm/bilinearfilter_arm.h | 35 | ||||
-rw-r--r-- | vp8/encoder/arm/arm_csystemdependent.c | 8 | ||||
-rw-r--r-- | vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm | 147 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.c | 34 | ||||
-rw-r--r-- | vp8/encoder/arm/variance_arm.h | 17 | ||||
-rw-r--r-- | vp8/encoder/mcomp.c | 10 | ||||
-rw-r--r-- | vp8/encoder/mcomp.h | 4 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 2 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 2 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 4 | ||||
-rw-r--r-- | vp8/encoder/temporal_filter.c | 2 | ||||
-rw-r--r-- | vp8/vp8_common.mk | 1 | ||||
-rw-r--r-- | vp8/vp8cx_arm.mk | 4 |
14 files changed, 255 insertions, 36 deletions
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c index 961d142c9..6a46ef685 100644 --- a/vp8/common/arm/bilinearfilter_arm.c +++ b/vp8/common/arm/bilinearfilter_arm.c @@ -12,26 +12,7 @@ #include <math.h> #include "filter.h" #include "subpixel.h" - -extern void vp8_filter_block2d_bil_first_pass_armv6 -( - unsigned char *src_ptr, - unsigned short *dst_ptr, - unsigned int src_pitch, - unsigned int height, - unsigned int width, - const short *vp8_filter -); - -extern void vp8_filter_block2d_bil_second_pass_armv6 -( - unsigned short *src_ptr, - unsigned char *dst_ptr, - int dst_pitch, - unsigned int height, - unsigned int width, - const short *vp8_filter -); +#include "arm/bilinearfilter_arm.h" void vp8_filter_block2d_bil_armv6 ( diff --git a/vp8/common/arm/bilinearfilter_arm.h b/vp8/common/arm/bilinearfilter_arm.h new file mode 100644 index 000000000..b7155d3f0 --- /dev/null +++ b/vp8/common/arm/bilinearfilter_arm.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef BILINEARFILTER_ARM_H +#define BILINEARFILTER_ARM_H + +extern void vp8_filter_block2d_bil_first_pass_armv6 +( + const unsigned char *src_ptr, + unsigned short *dst_ptr, + unsigned int src_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +); + +extern void vp8_filter_block2d_bil_second_pass_armv6 +( + const unsigned short *src_ptr, + unsigned char *dst_ptr, + int dst_pitch, + unsigned int height, + unsigned int width, + const short *vp8_filter +); + +#endif /* BILINEARFILTER_ARM_H */ diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index a1f110260..6c17a7984 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -38,14 +38,14 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/ + cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;*/ + cpi->rtcd.variance.var16x16 = vp8_variance16x16_armv6; /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/ + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_armv6; /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ diff --git a/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm new file mode 100644 index 000000000..8d7258af7 --- /dev/null +++ b/vp8/encoder/arm/armv6/vp8_variance16x16_armv6.asm @@ -0,0 +1,147 @@ +; +; Copyright (c) 2011 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_variance16x16_armv6| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; r0 unsigned char *src_ptr +; r1 int source_stride +; r2 unsigned char *ref_ptr +; r3 int recon_stride +; stack unsigned int *sse +|vp8_variance16x16_armv6| PROC + + stmfd sp!, {r4-r12, lr} + mov r12, #16 ; set loop counter to 16 (=block height) + mov r8, #0 ; initialize sum = 0 + mov r11, #0 ; initialize sse = 0 + +loop + ; 1st 4 pixels + ldr r4, [r0, #0x0] ; load 4 src pixels + ldr r5, [r2, #0x0] ; load 4 ref pixels + + mov lr, #0 ; constant zero + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + ; calculate total sum + adds r8, r8, r4 ; add positive differences to sum + subs r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 2nd 4 pixels + ldr r4, [r0, #0x4] ; load 4 src pixels + ldr r5, [r2, #0x4] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 3rd 4 pixels + ldr r4, [r0, #0x8] ; load 4 src pixels + ldr r5, [r2, #0x8] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + + ; 4th 4 pixels + ldr r4, [r0, #0xc] ; load 4 src pixels + ldr r5, [r2, #0xc] ; load 4 ref pixels + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + usub8 r6, r4, r5 ; calculate difference + add r0, r0, r1 ; set src_ptr to next row + sel r7, r6, lr ; select bytes with positive difference + usub8 r9, r5, r4 ; calculate difference with reversed operands + add r2, r2, r3 ; set dst_ptr to next row + sel r6, r9, lr ; select bytes with negative difference + + ; calculate partial sums + usad8 r4, r7, lr ; calculate sum of positive differences + usad8 r5, r6, lr ; calculate sum of negative differences + orr r6, r6, r7 ; differences of all 4 pixels + + ; calculate total sum + add r8, r8, r4 ; add positive differences to sum + sub r8, r8, r5 ; substract negative differences from sum + + ; calculate sse + uxtb16 r5, r6 ; byte (two pixels) to halfwords + uxtb16 r10, r6, ror #8 ; another two pixels to halfwords + smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1) + smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2) + + + subs r12, r12, #1 + + bne loop + + ; return stuff + ldr r6, [sp, #0x28] ; get address of sse + mul r0, r8, r8 ; sum * sum + str r11, [r6] ; store sse + sub r0, r11, r0, ASR #8 ; return (sse - ((sum * sum) >> 8)) + + ldmfd sp!, {r4-r12, pc} + + ENDP + + END diff --git a/vp8/encoder/arm/variance_arm.c b/vp8/encoder/arm/variance_arm.c index b40c0482f..9737bef46 100644 --- a/vp8/encoder/arm/variance_arm.c +++ b/vp8/encoder/arm/variance_arm.c @@ -10,6 +10,40 @@ #include "vpx_config.h" #include "variance.h" +#include "filter.h" +#include "arm/bilinearfilter_arm.h" + +#if HAVE_ARMV6 + +unsigned int vp8_sub_pixel_variance16x16_armv6 +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse +) +{ + unsigned short first_pass[36*16]; + unsigned char second_pass[20*16]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + vp8_filter_block2d_bil_first_pass_armv6(src_ptr, first_pass, + src_pixels_per_line, + 17, 16, HFilter); + vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass, + 16, 16, 16, VFilter); + + return vp8_variance16x16_armv6(second_pass, 16, dst_ptr, + dst_pixels_per_line, sse); +} + +#endif #if HAVE_ARMV7 diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index 3cbacfac3..06d72873e 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -12,6 +12,23 @@ #ifndef VARIANCE_ARM_H #define VARIANCE_ARM_H +#if HAVE_ARMV6 + +extern prototype_variance(vp8_variance16x16_armv6); +extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_armv6); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_variance_subpixvar16x16 +#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_armv6 + +#undef vp8_variance_var16x16 +#define vp8_variance_var16x16 vp8_variance16x16_armv6 + +#endif /* !CONFIG_RUNTIME_CPU_DETECT */ + +#endif /* HAVE_ARMV6 */ + #if HAVE_ARMV7 extern prototype_sad(vp8_sad4x4_neon); extern prototype_sad(vp8_sad8x8_neon); diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index d9923fbe9..33aaa2ca9 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -779,15 +779,17 @@ int vp8_hex_search int *num00, const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], - int *mvcost[2] + int *mvcost[2], + MV *center_mv ) { MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; + MV neighbors[8] = { { -1, -1}, {0, -1}, {1, -1}, { -1, 0}, {1, 0}, { -1, 1}, {0, 1}, {1, 1} } ; int i, j; unsigned char *src = (*(b->base_src) + b->src); int src_stride = b->src_stride; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; + int rr = center_mv->row, rc = center_mv->col; + int br = ref_mv->row >> 3, bc = ref_mv->col >> 3, tr, tc; unsigned int besterr, thiserr = 0x7fffffff; int k = -1, tk; @@ -892,7 +894,7 @@ cal_neighbors: best_mv->row = br; best_mv->col = bc; - return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; + return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + vp8_mv_err_cost(best_mv, center_mv, mvcost, error_per_bit) ; } #undef MVC #undef PRE diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 7600f87fc..83f95c6e0 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -43,8 +43,8 @@ extern int vp8_hex_search int *num00, const vp8_variance_fn_ptr_t *vf, int *mvsadcost[2], - int *mvcost[2] - + int *mvcost[2], + MV *center_mv ); typedef int (fractional_mv_step_fp) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 60ccc4e6e..6f964f5ef 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1187,6 +1187,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) #else sf->search_method = DIAMOND; #endif + sf->iterative_sub_pixel = 0; cpi->mode_check_freq[THR_V_PRED] = 4; cpi->mode_check_freq[THR_H_PRED] = 4; @@ -1238,7 +1239,6 @@ void vp8_set_speed_features(VP8_COMP *cpi) int total_skip; int min = 2000; - sf->iterative_sub_pixel = 0; if (cpi->oxcf.encode_breakout > 2000) min = cpi->oxcf.encode_breakout; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 287ca618e..0bfcd38a6 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -718,7 +718,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 80af8fa74..818e6afe1 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1214,7 +1214,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, if (cpi->sf.search_method == HEX) bestsme = vp8_hex_search(x, c, e, bsi->ref_mv, - &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost); + &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); else { @@ -2156,7 +2156,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 3c6d1a4d9..e4c3db1b8 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -204,7 +204,7 @@ static int vp8_temporal_filter_find_matching_mb_c step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost); + mvsadcost, mvcost, &best_ref_mv1); } else { diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index bf9fb513c..e98ee6b5a 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -121,6 +121,7 @@ VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c # common (c) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.h VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index b23ac96ca..abc5dc800 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -17,9 +17,10 @@ VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c VP8_CX_SRCS-$(ARCH_ARM) += encoder/asm_enc_offsets.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/variance_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.c +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/variance_arm.h VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c @@ -33,6 +34,7 @@ VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_ar #File list for armv6 # encoder +VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/vp8_variance16x16_armv6$(ASM) VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/armv6/walsh_v6$(ASM) #File list for neon |