diff options
author | Yunqing Wang <yunqingwang@google.com> | 2017-04-24 12:06:49 -0700 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2017-04-24 16:28:27 -0700 |
commit | 10a497bd3823d88cf4c2f816a648ca0e79bd07be (patch) | |
tree | 2d0fc0df7b30d58fb1b216717efbdaf3a83ad75d /vp9 | |
parent | c530208ae3cc0cb6835aeaa8e9ceb7edb37f40f0 (diff) | |
download | libvpx-10a497bd3823d88cf4c2f816a648ca0e79bd07be.tar libvpx-10a497bd3823d88cf4c2f816a648ca0e79bd07be.tar.gz libvpx-10a497bd3823d88cf4c2f816a648ca0e79bd07be.tar.bz2 libvpx-10a497bd3823d88cf4c2f816a648ca0e79bd07be.zip |
Make the row based multi-threaded encoder deterministic
This patch followed allow_exhaustive_searches feature modification and
continued to modify the encoder to achieve the determinism in the row
based multi-threaded encoding. While row-mt = 1 and using multiple
threads, the adaptive feature in encoder was disabled, which gave
BDRate gain(at speed 1, -0.6% ~ -0.7%; at speed 2, -0.46% ~ -0.59%),
but some encoder speed losses(7% ~ 10% at speed 1 and 3% ~ 6% at
speed 2). These speed losses were acceptable considering the speed
gains obtained from row-mt.
Change-Id: I60d87a25346ebc487a864b57d559f560b7e398bb
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 18 | ||||
-rw-r--r-- | vp9/encoder/vp9_multi_thread.c | 16 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 22 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 14 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 31 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 10 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 12 |
11 files changed, 27 insertions, 131 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 12dc226da..e2adf731f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -4341,7 +4341,6 @@ void vp9_init_tile_data(VP9_COMP *cpi) { } } #if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex = NULL; tile_data->row_base_thresh_freq_fact = NULL; #endif } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index d82b706be..c26e5cf47 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -5281,4 +5281,9 @@ void vp9_set_row_mt(VP9_COMP *cpi) { if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) { cpi->row_mt = 1; } + + if (cpi->row_mt && cpi->oxcf.max_threads > 1) + cpi->row_mt_bit_exact = 1; + else + cpi->row_mt_bit_exact = 0; } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 94ebc3b6f..d31b67138 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -268,7 +268,6 @@ typedef struct VP9EncoderConfig { VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; int row_mt; - unsigned int row_mt_bit_exact; unsigned int motion_vector_unit_test; } VP9EncoderConfig; @@ -286,9 +285,6 @@ typedef struct TileDataEnc { // Used for adaptive_rd_thresh with row multithreading int *row_base_thresh_freq_fact; -#if CONFIG_MULTITHREAD - pthread_mutex_t *enc_row_mt_mutex; -#endif } TileDataEnc; typedef struct RowMTInfo { @@ -692,7 +688,9 @@ typedef struct VP9_COMP { void (*row_mt_sync_read_ptr)(VP9RowMTSync *const, int, int); void (*row_mt_sync_write_ptr)(VP9RowMTSync *const, int, int, const int); ARNRFilterData arnr_filter_data; + int row_mt; + unsigned int row_mt_bit_exact; // Previous Partition Info BLOCK_SIZE *prev_partition; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e7639a7c5..84a7ce6e1 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -979,12 +979,12 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (log_intra < 10.0) { mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05); fp_acc_data->intra_factor += mb_intra_factor; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = mb_intra_factor; } else { fp_acc_data->intra_factor += 1.0; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0; } @@ -999,12 +999,12 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) { mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample)); fp_acc_data->brightness_factor += mb_brightness_factor; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = mb_brightness_factor; } else { fp_acc_data->brightness_factor += 1.0; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0; } @@ -1166,7 +1166,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (((this_error - intrapenalty) * 9 <= motion_error * 10) && (this_error < (2 * intrapenalty))) { fp_acc_data->neutral_count += 1.0; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0; // Also track cases where the intra is not much worse than the inter @@ -1176,7 +1176,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, mb_neutral_count = (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); fp_acc_data->neutral_count += mb_neutral_count; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = mb_neutral_count; } @@ -1424,7 +1424,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { cm->log2_tile_rows = 0; - if (cpi->oxcf.row_mt_bit_exact && cpi->twopass.fp_mb_float_stats == NULL) + if (cpi->row_mt_bit_exact && cpi->twopass.fp_mb_float_stats == NULL) CHECK_MEM_ERROR( cm, cpi->twopass.fp_mb_float_stats, vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1)); @@ -1441,13 +1441,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { } else { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; - if (cpi->oxcf.row_mt_bit_exact) { + if (cpi->row_mt_bit_exact) { cm->log2_tile_cols = 0; vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs); } vp9_encode_fp_row_mt(cpi); first_tile_col = &cpi->tile_data[0]; - if (cpi->oxcf.row_mt_bit_exact) + if (cpi->row_mt_bit_exact) accumulate_floating_point_stats(cpi, first_tile_col); first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data)); } diff --git a/vp9/encoder/vp9_multi_thread.c b/vp9/encoder/vp9_multi_thread.c index 0ad5dcc48..da06fb151 100644 --- a/vp9/encoder/vp9_multi_thread.c +++ b/vp9/encoder/vp9_multi_thread.c @@ -110,19 +110,6 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { multi_thread_ctxt->num_tile_vert_sbs[tile_row] = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); } - -#if CONFIG_MULTITHREAD - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - - CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex, - vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex))); - - pthread_mutex_init(this_tile->enc_row_mt_mutex, NULL); - } - } -#endif } void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { @@ -165,9 +152,6 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { this_tile->row_base_thresh_freq_fact = NULL; } } - pthread_mutex_destroy(this_tile->enc_row_mt_mutex); - vpx_free(this_tile->enc_row_mt_mutex); - this_tile->enc_row_mt_mutex = NULL; } } #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 9c6138f6d..501cd32cf 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1720,15 +1720,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || (!cpi->sf.adaptive_rd_thresh_row_mt && - rd_less_than_thresh( - best_rdc.rdcost, mode_rd_thresh, -#if CONFIG_MULTITHREAD - // Synchronization of this function - // is only necessary when - // adaptive_rd_thresh is > 0. - cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, -#endif - &rd_thresh_freq_fact[mode_index]))) + rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + &rd_thresh_freq_fact[mode_index]))) continue; if (this_mode == NEWMV) { @@ -2098,15 +2091,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || (!cpi->sf.adaptive_rd_thresh_row_mt && - rd_less_than_thresh( - best_rdc.rdcost, mode_rd_thresh, -#if CONFIG_MULTITHREAD - // Synchronization of this function - // is only necessary when - // adaptive_rd_thresh is > 0. - cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, -#endif - &rd_thresh_freq_fact[mode_index]))) + rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + &rd_thresh_freq_fact[mode_index]))) continue; mi->mode = this_mode; diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 3c49fe665..39a7742f0 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -650,15 +650,7 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { } void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, - int bsize, -#if CONFIG_MULTITHREAD - pthread_mutex_t *enc_row_mt_mutex, -#endif - int best_mode_index) { -#if CONFIG_MULTITHREAD - if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex); -#endif - + int bsize, int best_mode_index) { if (rd_thresh > 0) { const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; int mode; @@ -676,10 +668,6 @@ void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, } } } - -#if CONFIG_MULTITHREAD - if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex); -#endif } int vp9_get_intra_cost_penalty(int qindex, int qdelta, diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index aae47dcdd..1e1176866 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -170,32 +170,11 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize, -#if CONFIG_MULTITHREAD - pthread_mutex_t *enc_row_mt_mutex, -#endif int best_mode_index); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, -#if CONFIG_MULTITHREAD - pthread_mutex_t *enc_row_mt_mutex, -#endif const int *const thresh_fact) { - int is_rd_less_than_thresh; - -#if CONFIG_MULTITHREAD - // Synchronize to ensure data coherency as thresh_freq_fact is maintained at - // tile level and not thread-safe with row based multi-threading - if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex); -#endif - - is_rd_less_than_thresh = - best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX; - -#if CONFIG_MULTITHREAD - if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex); -#endif - - return is_rd_less_than_thresh; + return best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX; } static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2278ddc0f..cf46905c9 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3161,11 +3161,6 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0; -#if CONFIG_MULTITHREAD - if (NULL != tile_data->enc_row_mt_mutex) - pthread_mutex_lock(tile_data->enc_row_mt_mutex); -#endif - for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; @@ -3187,11 +3182,6 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, memcpy(mode_map, tile_mode_map, sizeof(mode_map)); -#if CONFIG_MULTITHREAD - if (NULL != tile_data->enc_row_mt_mutex) - pthread_mutex_unlock(tile_data->enc_row_mt_mutex); -#endif - for (midx = 0; midx < MAX_MODES; ++midx) { int mode_index = mode_map[midx]; int mode_excluded = 0; @@ -3628,11 +3618,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (!cpi->rc.is_src_frame_alt_ref) vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, - sf->adaptive_rd_thresh, bsize, -#if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, -#endif - best_mode_index); + sf->adaptive_rd_thresh, bsize, best_mode_index); // macroblock modes *mi = best_mbmode; @@ -3772,11 +3758,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data, (cm->interp_filter == mi->interp_filter)); vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, - cpi->sf.adaptive_rd_thresh, bsize, -#if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, -#endif - THR_ZEROMV); + cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); vp9_zero(best_pred_diff); vp9_zero(best_filter_diff); @@ -3922,9 +3904,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, if (!internal_active_edge && rd_less_than_thresh(best_rd, rd_opt->threshes[segment_id][bsize][ref_index], -#if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, -#endif &rd_thresh_freq_fact[ref_index])) continue; @@ -4374,11 +4353,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, !is_inter_block(&best_mbmode)); vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh, - bsize, -#if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, -#endif - best_ref_index); + bsize, best_ref_index); // macroblock modes *mi = best_mbmode; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index fbf13969e..0c200f9be 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -639,10 +639,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match - if (cpi->oxcf.row_mt_bit_exact) { - sf->adaptive_rd_thresh = 0; - sf->adaptive_pred_interp_filter = 0; - } + if (cpi->row_mt_bit_exact) sf->adaptive_rd_thresh = 0; // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test == 1) @@ -796,10 +793,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match - if (cpi->oxcf.row_mt_bit_exact) { - sf->adaptive_rd_thresh = 0; - sf->adaptive_pred_interp_filter = 0; - } + if (cpi->row_mt_bit_exact) sf->adaptive_rd_thresh = 0; // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test == 1) diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index a335a4ab5..25fc80a9a 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -52,7 +52,6 @@ struct vp9_extracfg { int render_width; int render_height; unsigned int row_mt; - unsigned int row_mt_bit_exact; unsigned int motion_vector_unit_test; }; @@ -86,7 +85,6 @@ static struct vp9_extracfg default_extra_cfg = { 0, // render width 0, // render height 0, // row_mt - 0, // row_mt_bit_exact 0, // motion_vector_unit_test }; @@ -252,7 +250,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, "or kf_max_dist instead."); RANGE_CHECK(extra_cfg, row_mt, 0, 1); - RANGE_CHECK(extra_cfg, row_mt_bit_exact, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); RANGE_CHECK(extra_cfg, cpu_used, -8, 8); @@ -564,7 +561,6 @@ static vpx_codec_err_t set_encoder_config( oxcf->target_level = extra_cfg->target_level; oxcf->row_mt = extra_cfg->row_mt; - oxcf->row_mt_bit_exact = extra_cfg->row_mt_bit_exact; oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { @@ -862,13 +858,6 @@ static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } -static vpx_codec_err_t ctrl_enable_row_mt_bit_exact(vpx_codec_alg_priv_t *ctx, - va_list args) { - struct vp9_extracfg extra_cfg = ctx->extra_cfg; - extra_cfg.row_mt_bit_exact = CAST(VP9E_ENABLE_ROW_MT_BIT_EXACT, args); - return update_extra_cfg(ctx, &extra_cfg); -} - static vpx_codec_err_t ctrl_enable_motion_vector_unit_test( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; @@ -1633,7 +1622,6 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_SET_RENDER_SIZE, ctrl_set_render_size }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, { VP9E_SET_ROW_MT, ctrl_set_row_mt }, - { VP9E_ENABLE_ROW_MT_BIT_EXACT, ctrl_enable_row_mt_bit_exact }, { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, // Getters |