diff options
Diffstat (limited to 'vp8')
-rw-r--r-- | vp8/common/findnearmv.c | 39 | ||||
-rw-r--r-- | vp8/common/rtcd_defs.sh | 2 | ||||
-rw-r--r-- | vp8/encoder/variance_c.c | 77 | ||||
-rw-r--r-- | vp8/vp8cx.mk | 2 |
4 files changed, 100 insertions, 20 deletions
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c index 339458d58..3f1a451eb 100644 --- a/vp8/common/findnearmv.c +++ b/vp8/common/findnearmv.c @@ -168,6 +168,7 @@ vp8_prob *vp8_mv_ref_probs(VP8_COMMON *pc, } #if CONFIG_NEWBESTREFMV +#define SP(x) (((x) & 7) << 1) unsigned int vp8_sad3x16_c( const unsigned char *src_ptr, int src_stride, @@ -189,7 +190,6 @@ unsigned int vp8_sad16x3_c( * above and a number cols of pixels in the left to select the one with best * score to use as ref motion vector */ - void vp8_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *ref_y_buffer, int ref_y_stride, @@ -203,6 +203,7 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, unsigned char *above_ref; unsigned char *left_ref; int sad; + int sse; int sad_scores[MAX_MV_REFS] = {0}; int_mv sorted_mvs[MAX_MV_REFS]; int zero_seen = FALSE; @@ -211,16 +212,16 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, best_mv->as_int = nearest->as_int = near->as_int = 0; vpx_memset(sorted_mvs, 0, sizeof(sorted_mvs)); - above_src = xd->dst.y_buffer - xd->dst.y_stride * 3; - left_src = xd->dst.y_buffer - 3; - above_ref = ref_y_buffer - ref_y_stride * 3; - left_ref = ref_y_buffer - 3; + above_src = xd->dst.y_buffer - xd->dst.y_stride * 2; + left_src = xd->dst.y_buffer - 2; + above_ref = ref_y_buffer - ref_y_stride * 2; + left_ref = ref_y_buffer - 2; //for(i = 0; i < MAX_MV_REFS; ++i) { // Limit search to the predicted best 4 for(i = 0; i < 4; ++i) { int_mv this_mv; - int offset=0; + int offset = 0; int row_offset, col_offset; this_mv.as_int = mvlist[i].as_int; @@ -238,19 +239,23 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, xd->mb_to_top_edge - LEFT_TOP_MARGIN + 16, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); - row_offset = (this_mv.as_mv.row > 0) ? - ((this_mv.as_mv.row + 3) >> 3):((this_mv.as_mv.row + 4) >> 3); - col_offset = (this_mv.as_mv.col > 0) ? - ((this_mv.as_mv.col + 3) >> 3):((this_mv.as_mv.col + 4) >> 3); + row_offset = this_mv.as_mv.row >> 3; + col_offset = this_mv.as_mv.col >> 3; offset = ref_y_stride * row_offset + col_offset; sad = 0; - if (xd->up_available) - sad += vp8_sad16x3(above_src, xd->dst.y_stride, - above_ref + offset, ref_y_stride, INT_MAX); - if (xd->left_available) - sad += vp8_sad3x16(left_src, xd->dst.y_stride, - left_ref + offset, ref_y_stride, INT_MAX); + if (xd->up_available) { + vp8_sub_pixel_variance16x2_c(above_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + above_src, xd->dst.y_stride, &sse); + sad += sse; + } + if (xd->left_available) { + vp8_sub_pixel_variance2x16_c(left_ref + offset, ref_y_stride, + SP(this_mv.as_mv.col), SP(this_mv.as_mv.row), + left_src, xd->dst.y_stride, &sse); + sad += sse; + } // Add the entry to our list and then resort the list on score. sad_scores[i] = sad; sorted_mvs[i].as_int = this_mv.as_int; @@ -280,7 +285,7 @@ void vp8_find_best_ref_mvs(MACROBLOCKD *xd, // be more than one 0,0 entry in the sorted list. // The best ref mv is always set to the first entry (which gave the best // results. The nearest is set to the first non zero vector if available and - // near to the second non zero vector if avaialable. + // near to the second non zero vector if available. // We do not use 0,0 as a nearest or near as 0,0 has its own mode. if ( sorted_mvs[0].as_int ) { nearest->as_int = sorted_mvs[0].as_int; diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index 0feb441f0..5d388ac57 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -29,10 +29,10 @@ prototype void vp8_filter_block2d_16x16_8 "const unsigned char *src_ptr, const u # on the safe side, only enabled when compiled with 'gcc'. if [ "$CONFIG_GCC" = "yes" ]; then specialize vp8_filter_block2d_4x4_8 sse4_1 sse2 +fi specialize vp8_filter_block2d_8x4_8 ssse3 #sse4_1 sse2 specialize vp8_filter_block2d_8x8_8 ssse3 #sse4_1 sse2 specialize vp8_filter_block2d_16x16_8 ssse3 #sse4_1 sse2 -fi # diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index cbe2a51d6..a66c80b80 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -508,3 +508,80 @@ unsigned int vp8_sub_pixel_variance8x16_c return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } +#if CONFIG_NEWBESTREFMV +unsigned int vp8_variance2x16_c( + const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 2, 16, &var, &avg); + *sse = var; + return (var - ((avg * avg) >> 5)); +} + +unsigned int vp8_variance16x2_c( + const unsigned char *src_ptr, + const int source_stride, + const unsigned char *ref_ptr, + const int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 2, &var, &avg); + *sse = var; + return (var - ((avg * avg) >> 5)); +} + +unsigned int vp8_sub_pixel_variance16x2_c +( + const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse +) { + unsigned short FData3[16 * 3]; // Temp data bufffer used in filtering + unsigned char temp2[20 * 16]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 3, 16, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 2, 16, VFilter); + + return vp8_variance16x2_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp8_sub_pixel_variance2x16_c +( + const unsigned char *src_ptr, + const int src_pixels_per_line, + const int xoffset, + const int yoffset, + const unsigned char *dst_ptr, + const int dst_pixels_per_line, + unsigned int *sse +) { + unsigned short FData3[2 * 17]; // Temp data bufffer used in filtering + unsigned char temp2[2 * 16]; + const short *HFilter, *VFilter; + + HFilter = vp8_bilinear_filters[xoffset]; + VFilter = vp8_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, + src_pixels_per_line, 1, 17, 2, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 2, 2, 16, 2, VFilter); + + return vp8_variance2x16_c(temp2, 2, dst_ptr, dst_pixels_per_line, sse); +} +#endif diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index bbafcdb05..ef6da7cab 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -46,7 +46,6 @@ VP8_CX_SRCS-yes += encoder/generic/csystemdependent.c VP8_CX_SRCS-yes += encoder/block.h VP8_CX_SRCS-yes += encoder/boolhuff.h VP8_CX_SRCS-yes += encoder/bitstream.h -VP8_CX_SRCS-yes += encoder/dct.h VP8_CX_SRCS-yes += encoder/encodeintra.h VP8_CX_SRCS-yes += encoder/encodemb.h VP8_CX_SRCS-yes += encoder/encodemv.h @@ -89,7 +88,6 @@ VP8_CX_SRCS-yes += encoder/mbgraph.c VP8_CX_SRCS-yes += encoder/mbgraph.h -VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h |