diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 15 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 48 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 74 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 10 |
6 files changed, 134 insertions, 53 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index a73185623..1b3010c62 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3873,6 +3873,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + // search method and step parameter might be changed in speed settings. + init_motion_estimation(cpi); + if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && @@ -5598,8 +5601,7 @@ static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, &tpl_frame ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; if (tpl_ptr->ready[rf_idx]) { - nb_full_mvs[i].as_mv.row = tpl_ptr->mv_arr[rf_idx].as_mv.row >> 3; - nb_full_mvs[i].as_mv.col = tpl_ptr->mv_arr[rf_idx].as_mv.col >> 3; + nb_full_mvs[i].as_mv = get_full_mv(&tpl_ptr->mv_arr[rf_idx].as_mv); } else { nb_full_mvs[i].as_int = INVALID_MV; } @@ -5666,7 +5668,7 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, nb_full_mvs); vp9_full_pixel_diamond_new( cpi, x, &best_ref_mv1_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], - nb_full_mvs, &tpl_stats->mv_arr[rf_idx].as_mv, + nb_full_mvs, NB_MVS_NUM, &tpl_stats->mv_arr[rf_idx].as_mv, &tpl_stats->mv_dist[rf_idx], &tpl_stats->mv_cost[rf_idx]); #else (void)frame_idx; @@ -5973,8 +5975,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); - // TODO(sdeng): Implement SIMD based high bit-depth satd. - intra_cost = vpx_satd_c(coeff, pix_num); + intra_cost = vpx_highbd_satd(coeff, pix_num); } else { vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); @@ -6020,7 +6021,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); - inter_cost = vpx_satd_c(coeff, pix_num); + inter_cost = vpx_highbd_satd(coeff, pix_num); } else { vp9_build_inter_predictor( ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -6361,7 +6362,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3; full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3; this_tpl_stats->mv_cost[rf_idx] = - av1_nb_mvs_inconsistency(&full_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM); #endif // RE_COMPUTE_MV_INCONSISTENCY tpl_frame->mv_dist_sum[rf_idx] += this_tpl_stats->mv_dist[rf_idx]; tpl_frame->mv_cost_sum[rf_idx] += this_tpl_stats->mv_cost[rf_idx]; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 235f0345e..316227e3c 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1733,12 +1733,13 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } #if CONFIG_NON_GREEDY_MV -double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) { +double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, + int mv_num) { int i; int update = 0; double best_cost = 0; vpx_clear_system_state(); - for (i = 0; i < NB_MVS_NUM; ++i) { + for (i = 0; i < mv_num; ++i) { if (nb_mvs[i].as_int != INVALID_MV) { MV nb_mv = nb_mvs[i].as_mv; const double row_diff = mv->row - nb_mv.row; @@ -1762,7 +1763,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, double *best_mv_dist, double *best_mv_cost, int search_param, double lambda, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs) { + const int_mv *nb_full_mvs, int full_mv_num) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; @@ -1799,7 +1800,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, // Check the starting position *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); - *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); bestsad = (*best_mv_dist) + lambda * (*best_mv_cost); i = 0; @@ -1833,7 +1835,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, best_full_mv->col + ss_mv[i].col }; const double mv_dist = sad_array[t]; const double mv_cost = - av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -1854,7 +1856,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, const double mv_dist = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); const double mv_cost = - av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -2242,16 +2244,17 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, double lambda, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, MV *best_mv, - double *best_mv_dist, double *best_mv_cost) { + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv, double *best_mv_dist, + double *best_mv_cost) { int n, num00 = 0; double thissme; double bestsme; const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; vpx_clear_system_state(); - bestsme = vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, - best_mv_dist, best_mv_cost, step_param, - lambda, &n, fn_ptr, nb_full_mvs); + bestsme = vp9_diamond_search_sad_new( + x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost, + step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); // If there won't be more n-step search, check to see if refining search is // needed. @@ -2265,9 +2268,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV temp_mv; double mv_dist; double mv_cost; - thissme = vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, - &mv_dist, &mv_cost, step_param + n, - lambda, &num00, fn_ptr, nb_full_mvs); + thissme = vp9_diamond_search_sad_new( + x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost, + step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; @@ -2286,9 +2289,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV temp_mv = *best_mv; double mv_dist; double mv_cost; - thissme = - vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, lambda, - search_range, fn_ptr, nb_full_mvs); + thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, + lambda, search_range, fn_ptr, + nb_full_mvs, full_mv_num); if (thissme < bestsme) { bestsme = thissme; *best_mv = temp_mv; @@ -2428,7 +2431,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, double *best_mv_dist, double *best_mv_cost, double lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs) { + const int_mv *nb_full_mvs, int full_mv_num) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; @@ -2439,7 +2442,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, vpx_clear_system_state(); *best_mv_dist = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride); - *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); best_sad = (*best_mv_dist) + lambda * (*best_mv_cost); for (i = 0; i < search_range; i++) { @@ -2461,7 +2465,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; const double mv_dist = sads[j]; - const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; @@ -2479,7 +2484,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const double mv_dist = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); - const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 54f68ca74..6d89fdfdd 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -126,16 +126,23 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, double *best_mv_dist, double *best_mv_cost, double lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs); + const int_mv *nb_full_mvs, int full_mv_num); double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, double lambda, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, MV *best_mv, - double *best_mv_dist, double *best_mv_cost); - -double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs); + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv, double *best_mv_dist, + double *best_mv_cost); + +double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, int mv_num); +static INLINE MV get_full_mv(const MV *mv) { + MV out_mv; + out_mv.row = mv->row >> 3; + out_mv.col = mv->col >> 3; + return out_mv; +} #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 18b74f57b..c01e5f81b 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -176,8 +176,27 @@ static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { // largest dc_quant is 21387, therefore rdmult should always fit in int32_t const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); - int rdmult = q * q; - rdmult = rdmult * 3 + (rdmult * 2 / 3); + uint32_t rdmult = q * q; + + if (cpi->common.frame_type != KEY_FRAME) { + if (qindex < 1) + rdmult = rdmult * 3 + (rdmult * 2 / 3); + else if (qindex < 128) + rdmult = rdmult * 4; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 3; + } else { + if (qindex < 64) + rdmult = rdmult * 4; + else if (qindex <= 128) + rdmult = rdmult * 3 + rdmult / 2; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 7 + rdmult / 2; + } #if CONFIG_VP9_HIGHBITDEPTH switch (cpi->common.bit_depth) { case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index b55e2ddb4..24c500fbd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2315,6 +2315,61 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, block_size); } +#if CONFIG_NON_GREEDY_MV +#define MAX_PREV_NB_FULL_MV_NUM 8 +static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + int ref_frame, BLOCK_SIZE bsize, int mi_row, + int mi_col, int_mv *nb_full_mvs) { + int i; + const TileInfo *tile = &xd->tile; + int full_mv_num = 0; + assert(bsize >= BLOCK_8X8); + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_blocks[bsize][i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *nb_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + if (nb_mi->sb_type >= BLOCK_8X8) { + if (nb_mi->ref_frame[0] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } else if (nb_mi->ref_frame[1] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } + } else { + int j; + for (j = 0; j < 4; ++j) { + // TODO(angiebird): avoid using duplicated mvs + if (nb_mi->ref_frame[0] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = + get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } else if (nb_mi->ref_frame[1] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = + get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } + } + } + } + } + return full_mv_num; +} +#endif // CONFIG_NON_GREEDY_MV + static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { @@ -2338,11 +2393,12 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, #if CONFIG_NON_GREEDY_MV double mv_dist = 0; double mv_cost = 0; - double lambda = 0; + double lambda = (pw * ph) / 4; double bestsme; - int_mv nb_full_mvs[NB_MVS_NUM]; - // TODO(angiebird): Set nb_full_mvs properly. - vp9_zero(nb_full_mvs); + int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM]; + + const int nb_full_mv_num = + find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs); #else // CONFIG_NON_GREEDY_MV int bestsme = INT_MAX; int sadpb = x->sadperbit16; @@ -2418,9 +2474,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV - bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, - &tmp_mv->as_mv, &mv_dist, &mv_cost); + bestsme = vp9_full_pixel_diamond_new( + cpi, x, &mvp_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], + nb_full_mvs, nb_full_mv_num, &tmp_mv->as_mv, &mv_dist, &mv_cost); #else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, @@ -2461,8 +2517,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, #if CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_diamond_new( cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), - lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, &this_mv, &mv_dist, - &mv_cost); + lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, &this_mv, + &mv_dist, &mv_cost); #else // CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 9b6c69a73..4f3d470d6 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -710,7 +710,6 @@ static void set_rt_speed_feature_framesize_independent( // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. - // TODO(jianj): Investigate webm:1578 if (cpi->use_svc && cpi->svc.use_partition_reuse && cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) @@ -797,18 +796,11 @@ static void set_rt_speed_feature_framesize_independent( } // Special case for screen content: increase motion search on base spatial // layer when high motion is detected or previous SL0 frame was dropped. - // Avoid speed 5 for as there is an issue with SVC datarate test. - // TODO(marpan/jianj): Investigate issue at speed 5. - if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed > 5 && + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 && cpi->svc.spatial_layer_id == 0 && (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) { sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 2; - // TODO(marpan/jianj): Investigate issue for lower setting of step_param - // for spatial layers (namely on lower layers). - if (cpi->use_svc && cm->width != cpi->oxcf.width && - cm->height != cpi->oxcf.height) - sf->mv.fullpel_search_step_param = 4; } } |