From 14d91ac515bb06444534ce35f6fefb5f3b925de3 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Mon, 10 Dec 2018 15:29:39 -0800 Subject: Use motion field for mv inconsistency in mv search Change-Id: I25ea05f4bfe3c6f420e967c33763909c979a0d1b --- vp9/encoder/vp9_mcomp.h | 25 +++++++++++++++++ vp9/encoder/vp9_rdopt.c | 74 +++++++++++++------------------------------------ 2 files changed, 44 insertions(+), 55 deletions(-) diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index ab69afdcd..ab880ff9c 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -148,6 +148,31 @@ struct TplDepFrame; void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row, int mi_col, int rf_idx, BLOCK_SIZE bsize, int_mv *nb_full_mvs); + +static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) { + BLOCK_SIZE square_bsize; + switch (bsize) { + case BLOCK_4X4: + case BLOCK_4X8: + case BLOCK_8X4: square_bsize = BLOCK_4X4; break; + case BLOCK_8X8: + case BLOCK_8X16: + case BLOCK_16X8: square_bsize = BLOCK_8X8; break; + case BLOCK_16X16: + case BLOCK_16X32: + case BLOCK_32X16: square_bsize = BLOCK_16X16; break; + case BLOCK_32X32: + case BLOCK_32X64: + case BLOCK_64X32: + case BLOCK_64X64: square_bsize = BLOCK_32X32; break; + default: + square_bsize = BLOCK_INVALID; + printf("ERROR: invlid block size %d\n", bsize); + assert(0); + break; + } + return square_bsize; +} #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index debe88f9d..c1a079ff0 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2316,59 +2316,20 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_NON_GREEDY_MV -#define MAX_PREV_NB_FULL_MV_NUM 8 -static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd, - int ref_frame, BLOCK_SIZE bsize, int mi_row, - int mi_col, int_mv *nb_full_mvs) { - int i; - const TileInfo *tile = &xd->tile; - int full_mv_num = 0; - assert(bsize >= BLOCK_8X8); - for (i = 0; i < MVREF_NEIGHBOURS; ++i) { - const POSITION *mv_ref = &mv_ref_blocks[bsize][i]; - if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { - const MODE_INFO *nb_mi = - xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; - if (nb_mi->sb_type >= BLOCK_8X8) { - if (nb_mi->ref_frame[0] == ref_frame) { - nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv); - ++full_mv_num; - if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { - return full_mv_num; - } - } else if (nb_mi->ref_frame[1] == ref_frame) { - nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv); - ++full_mv_num; - if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { - return full_mv_num; - } - } - } else { - int j; - for (j = 0; j < 4; ++j) { - // TODO(angiebird): avoid using duplicated mvs - if (nb_mi->ref_frame[0] == ref_frame) { - nb_full_mvs[full_mv_num].as_mv = - get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv); - ++full_mv_num; - if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { - return full_mv_num; - } - } else if (nb_mi->ref_frame[1] == ref_frame) { - nb_full_mvs[full_mv_num].as_mv = - get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv); - ++full_mv_num; - if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { - return full_mv_num; - } - } - } - } - } +static int ref_frame_to_gf_rf_idx(int ref_frame) { + if (ref_frame == GOLDEN_FRAME) { + return 0; + } + if (ref_frame == LAST_FRAME) { + return 1; } - return full_mv_num; + if (ref_frame == ALTREF_FRAME) { + return 2; + } + assert(0); + return -1; } -#endif // CONFIG_NON_GREEDY_MV +#endif static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, @@ -2395,10 +2356,13 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, double mv_cost = 0; double lambda = (pw * ph) / 4; double bestsme; - int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM]; - - const int nb_full_mv_num = - find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs); + int_mv nb_full_mvs[NB_MVS_NUM]; + const int nb_full_mv_num = NB_MVS_NUM; + int gf_group_idx = cpi->twopass.gf_group.index; + int gf_rf_idx = ref_frame_to_gf_rf_idx(ref); + BLOCK_SIZE square_bsize = get_square_block_size(bsize); + vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col, + gf_rf_idx, square_bsize, nb_full_mvs); #else // CONFIG_NON_GREEDY_MV int bestsme = INT_MAX; int sadpb = x->sadperbit16; -- cgit v1.2.3 From 66bbd53882db448b76c4b04a357efdd23f80cf30 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Tue, 1 Jan 2019 07:28:20 -0800 Subject: Add sse cost in vp9_full_pixel_diamond_new Change-Id: I63614e652686557652985bde882889eea9ecbcad --- vp9/encoder/vp9_mcomp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 5a6717ab2..a2385a24c 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -2279,11 +2279,14 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, double thissme; double bestsme; const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; + const MV center_mv = { 0, 0 }; vpx_clear_system_state(); bestsme = vp9_diamond_search_sad_new( x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost, step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); + bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + // If there won't be more n-step search, check to see if refining search is // needed. if (n > further_steps) do_refine = 0; @@ -2299,6 +2302,7 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, thissme = vp9_diamond_search_sad_new( x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost, step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); + thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; @@ -2320,6 +2324,7 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, lambda, search_range, fn_ptr, nb_full_mvs, full_mv_num); + thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); if (thissme < bestsme) { bestsme = thissme; *best_mv = temp_mv; -- cgit v1.2.3 From fc165fbe00f546dd47755528eb790f7ec23ebb61 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Thu, 3 Jan 2019 20:48:12 -0800 Subject: Add full_pixel_exhaustive_new Add full_pixel_exhaustive_new() and exhuastive_mesh_search_new(). The two functions are variants from full_pixel_exhaustive() and exhuastive_mesh_search(). In the new versions, we use mv inconsistency in place of mv entropy cost. Change-Id: Icec98e6fae24f2771806a3e78276734624ec0303 --- vp9/encoder/vp9_mcomp.c | 155 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 152 insertions(+), 3 deletions(-) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index a2385a24c..05f3f2857 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1732,6 +1732,9 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, return best_sad; } +#define MIN_RANGE 7 +#define MAX_RANGE 256 +#define MIN_INTERVAL 1 #if CONFIG_NON_GREEDY_MV double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, int mv_num) { @@ -1757,6 +1760,152 @@ double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, return best_cost; } +static double exhuastive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, + int range, int step, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, double lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + MV fcenter_mv = { center_mv->row, center_mv->col }; + double best_sad; + int r, c, i; + int start_col, end_col, start_row, end_row; + int col_step = (step > 1) ? step : 4; + + assert(step >= 1); + + clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, + x->mv_limits.row_min, x->mv_limits.row_max); + *best_mv = fcenter_mv; + best_sad = + fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + + lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); + start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); + end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); + end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); + + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += col_step) { + // Step > 1 means we are not checking every location in this pass. + if (step > 1) { + const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; + double sad = + fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), + in_what->stride); + if (sad < best_sad) { + sad += + lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } else { + // 4 sads in a single call if we are checking every location + if (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; + addrs[i] = get_buf_from_mv(in_what, &mv); + } + fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); + + for (i = 0; i < 4; ++i) { + if (sads[i] < best_sad) { + const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; + const double sad = + sads[i] + lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, + full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } else { + for (i = 0; i < end_col - c; ++i) { + const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; + double sad = + fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride); + if (sad < best_sad) { + sad += lambda * + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } + } + } + } + + return best_sad; +} + +static double full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, + MV *centre_mv_full, + const vp9_variance_fn_ptr_t *fn_ptr, + MV *dst_mv, double lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const SPEED_FEATURES *const sf = &cpi->sf; + MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; + double bestsme; + int i; + int interval = sf->mesh_patterns[0].interval; + int range = sf->mesh_patterns[0].range; + int baseline_interval_divisor; + const MV dummy_mv = { 0, 0 }; + + // Trap illegal values for interval and range for this function. + if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || + (interval > range)) { + printf("ERROR: invalid range\n"); + assert(0); + } + + baseline_interval_divisor = range / interval; + + // Check size of proposed first range against magnitude of the centre + // value used as a starting point. + range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); + range = VPXMIN(range, MAX_RANGE); + interval = VPXMAX(interval, range / baseline_interval_divisor); + + // initial search + bestsme = + exhuastive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv, + lambda, nb_full_mvs, full_mv_num); + + if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { + // Progressive searches with range and step size decreasing each time + // till we reach a step size of 1. Then break out. + for (i = 1; i < MAX_MESH_STEP; ++i) { + // First pass with coarser step and longer range + bestsme = exhuastive_mesh_search_new( + x, &temp_mv, sf->mesh_patterns[i].range, + sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs, + full_mv_num); + + if (sf->mesh_patterns[i].interval == 1) break; + } + } + + bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0); + *dst_mv = temp_mv; + + return bestsme; +} + double vp9_diamond_search_sad_new(const MACROBLOCK *x, const search_site_config *cfg, const MV *init_full_mv, MV *best_full_mv, @@ -2332,6 +2481,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, *best_mv_cost = mv_cost; } } + + bestsme = full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, + nb_full_mvs, full_mv_num); return bestsme; } #endif // CONFIG_NON_GREEDY_MV @@ -2400,9 +2552,6 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, return bestsme; } -#define MIN_RANGE 7 -#define MAX_RANGE 256 -#define MIN_INTERVAL 1 // Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, -- cgit v1.2.3