diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 74 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 28 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 2 |
7 files changed, 116 insertions, 52 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 1b3010c62..df4223a23 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2377,6 +2377,19 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #endif // TODO(jingning): Reduce the actual memory use for tpl model build up. for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { +#if CONFIG_NON_GREEDY_MV + int sqr_bsize; + int rf_idx; + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize], + vpx_calloc(mi_rows * mi_cols, + sizeof(*cpi->tpl_stats[frame] + .pyramid_mv_arr[rf_idx][sqr_bsize]))); + } + } +#endif CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); @@ -5586,39 +5599,12 @@ void init_tpl_stats(VP9_COMP *cpi) { } #if CONFIG_NON_GREEDY_MV -static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, - int mi_col, int rf_idx, BLOCK_SIZE bsize, - int_mv *nb_full_mvs) { - const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; - const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; - int i; - for (i = 0; i < NB_MVS_NUM; ++i) { - int r = dirs[i][0] * mi_unit; - int c = dirs[i][1] * mi_unit; - if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && - mi_col + c < tpl_frame->mi_cols) { - const TplDepStats *tpl_ptr = - &tpl_frame - ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; - if (tpl_ptr->ready[rf_idx]) { - nb_full_mvs[i].as_mv = get_full_mv(&tpl_ptr->mv_arr[rf_idx].as_mv); - } else { - nb_full_mvs[i].as_int = INVALID_MV; - } - } else { - nb_full_mvs[i].as_int = INVALID_MV; - } - } -} -#endif - -#if CONFIG_NON_GREEDY_MV uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf, uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row, int mi_col, - TplDepStats *tpl_stats, int rf_idx) { - MV *mv = &tpl_stats->mv_arr[rf_idx].as_mv; + MV *mv, int rf_idx, double *mv_dist, + double *mv_cost) { #else // CONFIG_NON_GREEDY_MV uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf, @@ -5664,12 +5650,11 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, #if CONFIG_NON_GREEDY_MV (void)search_method; (void)sadpb; - prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize, - nb_full_mvs); - vp9_full_pixel_diamond_new( - cpi, x, &best_ref_mv1_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], - nb_full_mvs, NB_MVS_NUM, &tpl_stats->mv_arr[rf_idx].as_mv, - &tpl_stats->mv_dist[rf_idx], &tpl_stats->mv_cost[rf_idx]); + vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, + bsize, nb_full_mvs); + vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1, + &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv, + mv_dist, mv_cost); #else (void)frame_idx; (void)mi_row; @@ -6002,7 +5987,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, #if CONFIG_NON_GREEDY_MV (void)td; - mv.as_int = tpl_stats->mv_arr[rf_idx].as_int; + mv.as_int = + get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int; #else motion_compensated_prediction( cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, @@ -6104,6 +6090,7 @@ static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx, set_mv_limits(cm, x, mi_row, mi_col); for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); if (ref_frame[rf_idx] == NULL) { tpl_stats->ready[rf_idx] = 0; continue; @@ -6113,7 +6100,8 @@ static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx, motion_compensated_prediction( cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, - mi_row, mi_col, tpl_stats, rf_idx); + mi_row, mi_col, &mv->as_mv, rf_idx, &tpl_stats->mv_dist[rf_idx], + &tpl_stats->mv_cost[rf_idx]); } } @@ -6355,12 +6343,14 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; for (rf_idx = 0; rf_idx < 3; ++rf_idx) { #if RE_COMPUTE_MV_INCONSISTENCY + MV this_mv = + get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_mv; MV full_mv; int_mv nb_full_mvs[NB_MVS_NUM]; - prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize, - nb_full_mvs); - full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3; - full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3; + vp9_prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize, + nb_full_mvs); + full_mv.row = this_mv.row >> 3; + full_mv.col = this_mv.col >> 3; this_tpl_stats->mv_cost[rf_idx] = vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM); #endif // RE_COMPUTE_MV_INCONSISTENCY @@ -6414,7 +6404,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { const TplDepStats *tpl_ptr = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; - int_mv mv = tpl_ptr->mv_arr[idx]; + int_mv mv = *get_pyramid_mv(tpl_frame, idx, bsize, mi_row, mi_col); printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col); } } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 02814599d..5974750cf 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -297,11 +297,14 @@ typedef struct TplDepStats { int64_t inter_cost_arr[3]; int64_t recon_error_arr[3]; int64_t sse_arr[3]; - int_mv mv_arr[3]; double feature_score; #endif } TplDepStats; +#if CONFIG_NON_GREEDY_MV +#define SQUARE_BLOCK_SIZES 4 +#endif + typedef struct TplDepFrame { uint8_t is_valid; TplDepStats *tpl_stats_ptr; @@ -315,9 +318,36 @@ typedef struct TplDepFrame { double lambda; double mv_dist_sum[3]; double mv_cost_sum[3]; + int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES]; #endif } TplDepFrame; +#if CONFIG_NON_GREEDY_MV +static INLINE int get_square_block_idx(BLOCK_SIZE bsize) { + if (bsize == BLOCK_4X4) { + return 0; + } + if (bsize == BLOCK_8X8) { + return 1; + } + if (bsize == BLOCK_16X16) { + return 2; + } + if (bsize == BLOCK_32X32) { + return 3; + } + printf("ERROR: non-square block size\n"); + assert(0); + return -1; +} + +static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)] + [mi_row * tpl_frame->stride + mi_col]; +} +#endif + #define TPL_DEP_COST_SCALE_LOG2 4 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 30fd842a1..03ac93463 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -2317,9 +2317,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) { if (arf_depth_boost[idx] == 0) break; - arf_depth_bits[idx] = - calculate_boost_bits(rc->baseline_gf_interval - total_arfs, - arf_depth_boost[idx], total_group_bits); + arf_depth_bits[idx] = calculate_boost_bits( + rc->baseline_gf_interval - total_arfs - arf_depth_count[idx], + arf_depth_boost[idx], total_group_bits); total_group_bits -= arf_depth_bits[idx]; total_arfs += arf_depth_count[idx]; @@ -2691,8 +2691,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } // Calculate the extra bits to be used for boosted frame(s) - gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost, - gf_group_bits); + gf_arf_bits = calculate_boost_bits((rc->baseline_gf_interval - 1), + rc->gfu_boost, gf_group_bits); // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= gf_group_err; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 316227e3c..5a6717ab2 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1879,6 +1879,34 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, } return bestsad; } + +void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, + int mi_col, int rf_idx, BLOCK_SIZE bsize, + int_mv *nb_full_mvs) { + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int i; + for (i = 0; i < NB_MVS_NUM; ++i) { + int r = dirs[i][0] * mi_height; + int c = dirs[i][1] * mi_width; + if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && + mi_col + c < tpl_frame->mi_cols) { + const TplDepStats *tpl_ptr = + &tpl_frame + ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; + int_mv *mv = + get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c); + if (tpl_ptr->ready[rf_idx]) { + nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv); + } else { + nb_full_mvs[i].as_int = INVALID_MV; + } + } else { + nb_full_mvs[i].as_int = INVALID_MV; + } + } +} #endif // CONFIG_NON_GREEDY_MV int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 6d89fdfdd..ab69afdcd 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -143,6 +143,11 @@ static INLINE MV get_full_mv(const MV *mv) { out_mv.col = mv->col >> 3; return out_mv; } + +struct TplDepFrame; +void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row, + int mi_col, int rf_idx, BLOCK_SIZE bsize, + int_mv *nb_full_mvs); #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 4f60ea2f4..5ad68e2e5 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -417,6 +417,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { rc->rate_correction_factors[i] = 1.0; + rc->damped_adjustment[i] = 0; } rc->min_gf_interval = oxcf->min_gf_interval; @@ -720,6 +721,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; + RATE_FACTOR_LEVEL rf_lvl = + cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; int projected_size_based_on_q = 0; @@ -746,10 +749,16 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) / projected_size_based_on_q); - // More heavily damped adjustment used if we have been oscillating either side - // of target. - adjustment_limit = - 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); + // Do not use damped adjustment for the first frame of each frame type + if (!cpi->rc.damped_adjustment[rf_lvl]) { + adjustment_limit = 1.0; + cpi->rc.damped_adjustment[rf_lvl] = 1; + } else { + // More heavily damped adjustment used if we have been oscillating either + // side of target. + adjustment_limit = + 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); + } cpi->rc.q_2_frame = cpi->rc.q_1_frame; cpi->rc.q_1_frame = cm->base_qindex; diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index a343bd34b..a5c1f4cf0 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -195,6 +195,8 @@ typedef struct { int use_post_encode_drop; // External flag to enable post encode frame dropping, controlled by user. int ext_use_post_encode_drop; + + int damped_adjustment[RATE_FACTOR_LEVELS]; } RATE_CONTROL; struct VP9_COMP; |