diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_block.h | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 172 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 292 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 120 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.h | 3 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_variance_sse2.c | 55 |
10 files changed, 217 insertions, 460 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index fcf2a0420..f35a85fba 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -20,12 +20,6 @@ extern "C" { #endif -// motion search site -typedef struct { - MV mv; - int offset; -} search_site; - // Structure to hold snapshot of coding context during the mode picking process typedef struct { MODE_INFO mic; @@ -108,10 +102,6 @@ struct macroblock { int skip_optimize; int q_index; - search_site *ss; - int ss_count; - int searches_per_step; - int errorperbit; int sadperbit16; int sadperbit4; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 6816f555e..395d26aef 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -501,9 +501,9 @@ static void update_frame_size(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; if (cpi->sf.search_method == NSTEP) { - vp9_init3smotion_compensation(&cpi->mb, y_stride); + vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); } else if (cpi->sf.search_method == DIAMOND) { - vp9_init_dsmotion_compensation(&cpi->mb, y_stride); + vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); } } @@ -782,9 +782,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cm->error.setjmp = 1; - CHECK_MEM_ERROR(cm, cpi->mb.ss, vpx_calloc(sizeof(search_site), - (MAX_MVSEARCH_STEPS * 8) + 1)); - vp9_rtcd(); cpi->use_svc = 0; @@ -973,95 +970,73 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->rd.thresh_freq_fact[i][j] = 32; } -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \ - SDX3F, SDX8F, SDX4DF)\ +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ - cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ - cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ cpi->fn_ptr[BT].sdx3f = SDX3F; \ cpi->fn_ptr[BT].sdx8f = SDX8F; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; BFP(BLOCK_32X16, vp9_sad32x16, vp9_sad32x16_avg, vp9_variance32x16, vp9_sub_pixel_variance32x16, - vp9_sub_pixel_avg_variance32x16, NULL, NULL, - NULL, NULL, NULL, - vp9_sad32x16x4d) + vp9_sub_pixel_avg_variance32x16, NULL, NULL, vp9_sad32x16x4d) BFP(BLOCK_16X32, vp9_sad16x32, vp9_sad16x32_avg, vp9_variance16x32, vp9_sub_pixel_variance16x32, - vp9_sub_pixel_avg_variance16x32, NULL, NULL, - NULL, NULL, NULL, - vp9_sad16x32x4d) + vp9_sub_pixel_avg_variance16x32, NULL, NULL, vp9_sad16x32x4d) BFP(BLOCK_64X32, vp9_sad64x32, vp9_sad64x32_avg, vp9_variance64x32, vp9_sub_pixel_variance64x32, - vp9_sub_pixel_avg_variance64x32, NULL, NULL, - NULL, NULL, NULL, - vp9_sad64x32x4d) + vp9_sub_pixel_avg_variance64x32, NULL, NULL, vp9_sad64x32x4d) BFP(BLOCK_32X64, vp9_sad32x64, vp9_sad32x64_avg, vp9_variance32x64, vp9_sub_pixel_variance32x64, - vp9_sub_pixel_avg_variance32x64, NULL, NULL, - NULL, NULL, NULL, - vp9_sad32x64x4d) + vp9_sub_pixel_avg_variance32x64, NULL, NULL, vp9_sad32x64x4d) BFP(BLOCK_32X32, vp9_sad32x32, vp9_sad32x32_avg, vp9_variance32x32, vp9_sub_pixel_variance32x32, - vp9_sub_pixel_avg_variance32x32, vp9_variance_halfpixvar32x32_h, - vp9_variance_halfpixvar32x32_v, - vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, + vp9_sub_pixel_avg_variance32x32, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) BFP(BLOCK_64X64, vp9_sad64x64, vp9_sad64x64_avg, vp9_variance64x64, vp9_sub_pixel_variance64x64, - vp9_sub_pixel_avg_variance64x64, vp9_variance_halfpixvar64x64_h, - vp9_variance_halfpixvar64x64_v, - vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, + vp9_sub_pixel_avg_variance64x64, vp9_sad64x64x3, vp9_sad64x64x8, vp9_sad64x64x4d) BFP(BLOCK_16X16, vp9_sad16x16, vp9_sad16x16_avg, vp9_variance16x16, vp9_sub_pixel_variance16x16, - vp9_sub_pixel_avg_variance16x16, vp9_variance_halfpixvar16x16_h, - vp9_variance_halfpixvar16x16_v, - vp9_variance_halfpixvar16x16_hv, vp9_sad16x16x3, vp9_sad16x16x8, + vp9_sub_pixel_avg_variance16x16, vp9_sad16x16x3, vp9_sad16x16x8, vp9_sad16x16x4d) BFP(BLOCK_16X8, vp9_sad16x8, vp9_sad16x8_avg, vp9_variance16x8, vp9_sub_pixel_variance16x8, - vp9_sub_pixel_avg_variance16x8, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance16x8, vp9_sad16x8x3, vp9_sad16x8x8, vp9_sad16x8x4d) BFP(BLOCK_8X16, vp9_sad8x16, vp9_sad8x16_avg, vp9_variance8x16, vp9_sub_pixel_variance8x16, - vp9_sub_pixel_avg_variance8x16, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance8x16, vp9_sad8x16x3, vp9_sad8x16x8, vp9_sad8x16x4d) BFP(BLOCK_8X8, vp9_sad8x8, vp9_sad8x8_avg, vp9_variance8x8, vp9_sub_pixel_variance8x8, - vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance8x8, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) BFP(BLOCK_8X4, vp9_sad8x4, vp9_sad8x4_avg, vp9_variance8x4, vp9_sub_pixel_variance8x4, - vp9_sub_pixel_avg_variance8x4, NULL, NULL, - NULL, NULL, vp9_sad8x4x8, - vp9_sad8x4x4d) + vp9_sub_pixel_avg_variance8x4, NULL, vp9_sad8x4x8, vp9_sad8x4x4d) BFP(BLOCK_4X8, vp9_sad4x8, vp9_sad4x8_avg, vp9_variance4x8, vp9_sub_pixel_variance4x8, - vp9_sub_pixel_avg_variance4x8, NULL, NULL, - NULL, NULL, vp9_sad4x8x8, - vp9_sad4x8x4d) + vp9_sub_pixel_avg_variance4x8, NULL, vp9_sad4x8x8, vp9_sad4x8x4d) BFP(BLOCK_4X4, vp9_sad4x4, vp9_sad4x4_avg, vp9_variance4x4, vp9_sub_pixel_variance4x4, - vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL, + vp9_sub_pixel_avg_variance4x4, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) cpi->full_search_sad = vp9_full_search_sad; @@ -1182,7 +1157,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) { } dealloc_compressor_data(cpi); - vpx_free(cpi->mb.ss); vpx_free(cpi->tok); for (i = 0; i < sizeof(cpi->mbgraph_stats) / @@ -1444,77 +1418,67 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { } #endif -static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb, - YV12_BUFFER_CONFIG *dst_fb) { - const int in_w = src_fb->y_crop_width; - const int in_h = src_fb->y_crop_height; - const int out_w = dst_fb->y_crop_width; - const int out_h = dst_fb->y_crop_height; - const int in_w_uv = src_fb->uv_crop_width; - const int in_h_uv = src_fb->uv_crop_height; - const int out_w_uv = dst_fb->uv_crop_width; - const int out_h_uv = dst_fb->uv_crop_height; +static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t int i; + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + const int src_widths[4] = {src->y_crop_width, src->uv_crop_width, + src->uv_crop_width, src->y_crop_width}; + const int src_heights[4] = {src->y_crop_height, src->uv_crop_height, + src->uv_crop_height, src->y_crop_height}; + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; + const int dst_widths[4] = {dst->y_crop_width, dst->uv_crop_width, + dst->uv_crop_width, dst->y_crop_width}; + const int dst_heights[4] = {dst->y_crop_height, dst->uv_crop_height, + dst->uv_crop_height, dst->y_crop_height}; + + for (i = 0; i < MAX_MB_PLANE; ++i) + vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], + dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); - uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, - src_fb->alpha_buffer}; - int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, - src_fb->alpha_stride}; - - uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, - dst_fb->alpha_buffer}; - int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, - dst_fb->alpha_stride}; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - if (i == 0 || i == 3) { - // Y and alpha planes - vp9_resize_plane(srcs[i], in_h, in_w, src_strides[i], - dsts[i], out_h, out_w, dst_strides[i]); - } else { - // Chroma planes - vp9_resize_plane(srcs[i], in_h_uv, in_w_uv, src_strides[i], - dsts[i], out_h_uv, out_w_uv, dst_strides[i]); - } - } // TODO(hkuang): Call C version explicitly // as neon version only expand border size 32. - vp8_yv12_extend_frame_borders_c(dst_fb); -} - -static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, - YV12_BUFFER_CONFIG *dst_fb) { - const int in_w = src_fb->y_crop_width; - const int in_h = src_fb->y_crop_height; - const int out_w = dst_fb->y_crop_width; - const int out_h = dst_fb->y_crop_height; + vp8_yv12_extend_frame_borders_c(dst); +} + +static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + const int src_w = src->y_crop_width; + const int src_h = src->y_crop_height; + const int dst_w = dst->y_crop_width; + const int dst_h = dst->y_crop_height; + const uint8_t *const srcs[4] = {src->y_buffer, src->u_buffer, src->v_buffer, + src->alpha_buffer}; + const int src_strides[4] = {src->y_stride, src->uv_stride, src->uv_stride, + src->alpha_stride}; + uint8_t *const dsts[4] = {dst->y_buffer, dst->u_buffer, dst->v_buffer, + dst->alpha_buffer}; + const int dst_strides[4] = {dst->y_stride, dst->uv_stride, dst->uv_stride, + dst->alpha_stride}; int x, y, i; - uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer, - src_fb->alpha_buffer}; - int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride, - src_fb->alpha_stride}; - - uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer, - dst_fb->alpha_buffer}; - int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride, - dst_fb->alpha_stride}; - - for (y = 0; y < out_h; y += 16) { - for (x = 0; x < out_w; x += 16) { + for (y = 0; y < dst_h; y += 16) { + for (x = 0; x < dst_w; x += 16) { for (i = 0; i < MAX_MB_PLANE; ++i) { const int factor = (i == 0 || i == 3 ? 1 : 2); - const int x_q4 = x * (16 / factor) * in_w / out_w; - const int y_q4 = y * (16 / factor) * in_h / out_h; + const int x_q4 = x * (16 / factor) * src_w / dst_w; + const int y_q4 = y * (16 / factor) * src_h / dst_h; const int src_stride = src_strides[i]; const int dst_stride = dst_strides[i]; - uint8_t *src = srcs[i] + y / factor * in_h / out_h * src_stride + - x / factor * in_w / out_w; - uint8_t *dst = dsts[i] + y / factor * dst_stride + x / factor; + const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * + src_stride + (x / factor) * src_w / dst_w; + uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); - vp9_convolve8(src, src_stride, dst, dst_stride, - vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * in_w / out_w, - vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * in_h / out_h, + vp9_convolve8(src_ptr, src_stride, dst_ptr, dst_stride, + vp9_sub_pel_filters_8[x_q4 & 0xf], 16 * src_w / dst_w, + vp9_sub_pel_filters_8[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, 16 / factor); } } @@ -1522,7 +1486,7 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, // TODO(hkuang): Call C version explicitly // as neon version only expand border size 32. - vp8_yv12_extend_frame_borders_c(dst_fb); + vp8_yv12_extend_frame_borders_c(dst); } static int find_fp_qindex() { @@ -1701,7 +1665,7 @@ void vp9_scale_references(VP9_COMP *cpi) { for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; + const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 8f3249407..132b479e2 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -497,6 +497,8 @@ typedef struct VP9_COMP { int frame_flags; + search_site_config ss_cfg; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 34506f2bd..b408ced0e 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -418,7 +418,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, v_fn_ptr.vf = get_block_variance_fn(bsize); // Center the initial step/diamond search on best mv. - tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, + tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) @@ -441,7 +441,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (num00) { --num00; } else { - tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, + tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 89937f5a6..bbec4da76 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -101,32 +101,32 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, return 0; } -void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride) { +void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { int len, ss_count = 1; - x->ss[0].mv.col = x->ss[0].mv.row = 0; - x->ss[0].offset = 0; + cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; + cfg->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 4 search sites per step. const MV ss_mvs[] = {{-len, 0}, {len, 0}, {0, -len}, {0, len}}; int i; for (i = 0; i < 4; ++i) { - search_site *const ss = &x->ss[ss_count++]; + search_site *const ss = &cfg->ss[ss_count++]; ss->mv = ss_mvs[i]; ss->offset = ss->mv.row * stride + ss->mv.col; } } - x->ss_count = ss_count; - x->searches_per_step = 4; + cfg->ss_count = ss_count; + cfg->searches_per_step = 4; } -void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { +void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { int len, ss_count = 1; - x->ss[0].mv.col = x->ss[0].mv.row = 0; - x->ss[0].offset = 0; + cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; + cfg->ss[0].offset = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 8 search sites per step. @@ -136,14 +136,14 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }; int i; for (i = 0; i < 8; ++i) { - search_site *const ss = &x->ss[ss_count++]; + search_site *const ss = &cfg->ss[ss_count++]; ss->mv = ss_mvs[i]; ss->offset = ss->mv.row * stride + ss->mv.col; } } - x->ss_count = ss_count; - x->searches_per_step = 8; + cfg->ss_count = ss_count; + cfg->searches_per_step = 8; } /* @@ -871,7 +871,9 @@ int vp9_fast_dia_search(const MACROBLOCK *x, #undef CHECK_BETTER -int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, +int vp9_full_range_search_c(const MACROBLOCK *x, + const search_site_config *cfg, + MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { @@ -962,6 +964,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } int vp9_diamond_search_sad_c(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, @@ -973,8 +976,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = // (MAX_FIRST_STEP/4) pel... etc. - const search_site *const ss = &x->ss[search_param * x->searches_per_step]; - const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; + const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const uint8_t *best_address, *in_what_ref; int best_sad = INT_MAX; @@ -996,7 +999,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, i = 1; for (step = 0; step < tot_steps; step++) { - for (j = 0; j < x->searches_per_step; j++) { + for (j = 0; j < cfg->searches_per_step; j++) { const MV mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { @@ -1050,6 +1053,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, } int vp9_diamond_search_sadx4(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, @@ -1075,8 +1079,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... - const search_site *ss = &x->ss[search_param * x->searches_per_step]; - const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; + const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; + const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); @@ -1112,7 +1116,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (all_in) { unsigned int sad_array[4]; - for (j = 0; j < x->searches_per_step; j += 4) { + for (j = 0; j < cfg->searches_per_step; j += 4) { unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) @@ -1135,7 +1139,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, } } } else { - for (j = 0; j < x->searches_per_step; j++) { + for (j = 0; j < cfg->searches_per_step; j++) { // Trap illegal vectors const MV this_mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; @@ -1202,7 +1206,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *dst_mv) { MV temp_mv; int thissme, n, num00 = 0; - int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, + int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv); if (bestsme < INT_MAX) @@ -1220,7 +1224,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, if (num00) { num00--; } else { - thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, + thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv); if (thissme < INT_MAX) @@ -1290,192 +1294,154 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { + int r; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; - unsigned int bestsad = INT_MAX; - int r, c; - unsigned int thissad; - int ref_row = ref_mv->row; - int ref_col = ref_mv->col; - - // Apply further limits to prevent us looking using vectors that stretch - // beyond the UMV border - const int row_min = MAX(ref_row - distance, x->mv_row_min); - const int row_max = MIN(ref_row + distance, x->mv_row_max); - const int col_min = MAX(ref_col - distance, x->mv_col_min); - const int col_max = MIN(ref_col + distance, x->mv_col_max); - unsigned int sad_array[3]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + *best_mv = *ref_mv; - // Work out the mid point for the search - const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - for (r = row_min; r < row_max; r++) { - const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; - this_mv.row = r; - c = col_min; - - while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { - int i; + for (r = row_min; r < row_max; ++r) { + int c = col_min; + const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); + if (fn_ptr->sdx3f != NULL) { + while ((c + 2) < col_max) { + int i; + unsigned int sads[3]; - for (i = 0; i < 3; i++) { - thissad = sad_array[i]; + fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, + sads); - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 3; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - check_here++; - c++; } } while (c < col_max) { - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + check_here, in_what->stride, best_sad); + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } - - check_here++; - c++; + ++check_here; + ++c; } } - return bestsad; + + return best_sad; } int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv) { + int r; const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *const in_what = xd->plane[0].pre[0].buf; - const int in_what_stride = xd->plane[0].pre[0].stride; - MV this_mv; - unsigned int bestsad = INT_MAX; - int r, c; - int ref_row = ref_mv->row; - int ref_col = ref_mv->col; - - // Apply further limits to prevent us looking using vectors that stretch - // beyond the UMV border - const int row_min = MAX(ref_row - distance, x->mv_row_min); - const int row_max = MIN(ref_row + distance, x->mv_row_max); - const int col_min = MAX(ref_col - distance, x->mv_col_min); - const int col_max = MIN(ref_col + distance, x->mv_col_max); - DECLARE_ALIGNED_ARRAY(16, uint32_t, sad_array8, 8); - unsigned int sad_array[3]; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const int row_min = MAX(ref_mv->row - distance, x->mv_row_min); + const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); + const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); + const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + *best_mv = *ref_mv; - // Work out the mid point for the search - const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Baseline value at the center - bestsad = fn_ptr->sdf(what, what_stride, - bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - for (r = row_min; r < row_max; r++) { - const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; - this_mv.row = r; - c = col_min; - - while ((c + 7) < col_max) { - int i; + for (r = row_min; r < row_max; ++r) { + int c = col_min; + const uint8_t *check_here = &in_what->buf[r * in_what->stride + c]; - fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); + if (fn_ptr->sdx8f != NULL) { + while ((c + 7) < col_max) { + int i; + unsigned int sads[8]; - for (i = 0; i < 8; i++) { - unsigned int thissad = (unsigned int)sad_array8[i]; + fn_ptr->sdx8f(what->buf, what->stride, check_here, in_what->stride, + sads); - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 8; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - - check_here++; - c++; } } - while ((c + 2) < col_max && fn_ptr->sdx3f != NULL) { - int i; + if (fn_ptr->sdx3f != NULL) { + while ((c + 2) < col_max) { + int i; + unsigned int sads[3]; - fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); + fn_ptr->sdx3f(what->buf, what->stride, check_here, in_what->stride, + sads); - for (i = 0; i < 3; i++) { - unsigned int thissad = sad_array[i]; - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + for (i = 0; i < 3; ++i) { + unsigned int sad = sads[i]; + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } + ++check_here; + ++c; } - - check_here++; - c++; } } while (c < col_max) { - unsigned int thissad = fn_ptr->sdf(what, what_stride, - check_here, in_what_stride, bestsad); - - if (thissad < bestsad) { - this_mv.col = c; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + check_here, in_what->stride, best_sad); + if (sad < best_sad) { + const MV mv = {r, c}; + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } - - check_here++; - c++; + ++check_here; + ++c; } } - return bestsad; + + return best_sad; } int vp9_refining_search_sad_c(const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 70d7985e4..1f524f1f6 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -31,6 +31,20 @@ extern "C" { // for Block_16x16 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) +// motion search site +typedef struct search_site { + MV mv; + int offset; +} search_site; + +typedef struct search_site_config { + search_site ss[8 * MAX_MVSEARCH_STEPS + 1]; + int ss_count; + int searches_per_step; +} search_site_config; + +void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); +void vp9_init3smotion_compensation(search_site_config *cfg, int stride); void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv); int vp9_mv_bit_cost(const MV *mv, const MV *ref, @@ -46,8 +60,6 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); -void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); -void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); struct VP9_COMP; int vp9_init_search_range(struct VP9_COMP *cpi, int size); @@ -119,6 +131,7 @@ typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x, const MV *center_mv); typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x, + const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index c1493e719..56eb9440c 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -418,7 +418,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Perform intra prediction search, if the best SAD is above a certain // threshold. - if (best_rd > inter_mode_thresh) { + if (best_rd > inter_mode_thresh && bsize < cpi->sf.max_intra_bsize) { for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { vp9_predict_intra_block(xd, 0, b_width_log2(bsize), mbmi->tx_size, this_mode, diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 1399bfb7e..ae3c86aee 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -276,126 +276,6 @@ VAR(64, 64) SUBPIX_VAR(64, 64) SUBPIX_AVG_VAR(64, 64) -unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 4c8be71cd..152c3d962 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -82,9 +82,6 @@ typedef struct vp9_variance_vtable { vp9_variance_fn_t vf; vp9_subpixvariance_fn_t svf; vp9_subp_avg_variance_fn_t svaf; - vp9_variance_fn_t svf_halfpix_h; - vp9_variance_fn_t svf_halfpix_v; - vp9_variance_fn_t svf_halfpix_hv; vp9_sad_multi_fn_t sdx3f; vp9_sad_multi_fn_t sdx8f; vp9_sad_multi_d_fn_t sdx4df; diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index 9e65694a8..25d594632 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -494,58 +494,3 @@ FNS(ssse3, ssse3); #undef FNS #undef FN - -unsigned int vp9_variance_halfpixvar16x16_h_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_v_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - vp9_half_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} - - -unsigned int vp9_variance_halfpixvar16x16_hv_sse2( - const unsigned char *src_ptr, - int src_pixels_per_line, - const unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - int xsum0; - unsigned int xxsum0; - - vp9_half_horiz_vert_variance16x_h_sse2( - src_ptr, src_pixels_per_line, - dst_ptr, dst_pixels_per_line, 16, - &xsum0, &xxsum0); - - *sse = xxsum0; - return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); -} |