diff options
author | Yunqing Wang <yunqingwang@google.com> | 2023-04-24 21:15:14 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2023-04-24 21:15:14 +0000 |
commit | 52076a9c79b980180a86832acd20f17c1e583e3e (patch) | |
tree | cc0311095635f2ade1c2df2161e8cc23f0f249cf | |
parent | 24802201acd7dfa15928bcc47c1e270e7db5afac (diff) | |
parent | e7b58b69fd91a4288453c7c7003e1fc4cc48bb93 (diff) | |
download | libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar.gz libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar.bz2 libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.zip |
Merge "Reduce joint motion search iters based on bsize" into main
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 40 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 19 |
3 files changed, 49 insertions, 17 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c68cfefde..f051c6279 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1898,11 +1898,22 @@ static INLINE int skip_single_mode_based_on_mode_rate( return 0; } -#define NUM_ITERS 4 +#define MAX_JOINT_MV_SEARCH_ITERS 4 +static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) { + int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0 + if (sf_level >= 2) + num_iters = 0; + else if (sf_level >= 1) + num_iters = bsize < BLOCK_8X8 + ? 0 + : (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS); + return num_iters; +} + static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], - int *rate_mv) { + int *rate_mv, int num_iters) { const VP9_COMMON *const cm = &cpi->common; const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; @@ -1911,7 +1922,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const int refs[2] = { mi->ref_frame[0], mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] }; int_mv ref_mv[2]; - int_mv iter_mvs[NUM_ITERS][2]; + int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2]; int ite, ref; const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; struct scale_factors sf; @@ -1932,6 +1943,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH + // Check number of iterations do not exceed the max + assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS); + for (ref = 0; ref < 2; ++ref) { ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; @@ -1962,7 +1976,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, // Allow joint search multiple times iteratively for each reference frame // and break out of the search loop if it couldn't find a better mv. - for (ite = 0; ite < NUM_ITERS; ite++) { + for (ite = 0; ite < num_iters; ite++) { struct buf_2d ref_yv12[2]; uint32_t bestsme = UINT_MAX; int sadpb = x->sadperbit16; @@ -2044,7 +2058,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, } else { break; } - if (ite < NUM_ITERS - 1) { + if (ite < num_iters - 1) { iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int; iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int; } @@ -2250,12 +2264,16 @@ static int64_t rd_pick_best_sub8x8_mode( if (has_second_rf && this_mode == NEWMV && mi->interp_filter == EIGHTTAP) { + // Decide number of joint motion search iterations + const int num_joint_search_iters = get_joint_search_iters( + cpi->sf.comp_inter_joint_search_iter_level, bsize); // adjust src pointers mi_buf_shift(x, block); - if (sf->comp_inter_joint_search_thresh <= bsize) { + if (num_joint_search_iters) { int rate_mv; joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, - mi_col, seg_mvs[block], &rate_mv); + mi_col, seg_mvs[block], &rate_mv, + num_joint_search_iters); seg_mvs[block][mi->ref_frame[0]].as_int = frame_mv[this_mode][mi->ref_frame[0]].as_int; seg_mvs[block][mi->ref_frame[1]].as_int = @@ -2878,16 +2896,20 @@ static int64_t handle_inter_mode( if (this_mode == NEWMV) { int rate_mv; if (is_comp_pred) { + // Decide number of joint motion search iterations + const int num_joint_search_iters = get_joint_search_iters( + cpi->sf.comp_inter_joint_search_iter_level, bsize); + // Initialize mv using single prediction mode result. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (num_joint_search_iters) { #if CONFIG_COLLECT_COMPONENT_TIMING start_timing(cpi, joint_motion_search_time); #endif joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, - single_newmv, &rate_mv); + single_newmv, &rate_mv, num_joint_search_iters); #if CONFIG_COLLECT_COMPONENT_TIMING end_timing(cpi, joint_motion_search_time); #endif diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 04804da1c..60720e3ea 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -244,6 +244,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->trellis_opt_tx_rd.thresh = boosted ? 4.0 : 3.0; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->comp_inter_joint_search_iter_level = 1; // Reference masking is not supported in dynamic scaling mode. sf->reference_masking = oxcf->resize_mode != RESIZE_DYNAMIC; @@ -331,7 +332,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->comp_inter_joint_search_iter_level = 2; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->recode_tolerance_high = 45; sf->enhanced_full_pixel_motion_search = 0; @@ -530,7 +531,7 @@ static void set_rt_speed_feature_framesize_independent( } sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->comp_inter_joint_search_iter_level = 2; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; @@ -928,7 +929,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) { sf->mv.auto_mv_step_size = 0; sf->mv.fullpel_search_step_param = 6; sf->mv.use_downsampled_sad = 0; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->comp_inter_joint_search_iter_level = 0; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 7cb3f3527..70c61fe00 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -286,11 +286,20 @@ typedef struct SPEED_FEATURES { // adds overhead. int static_segmentation; - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - BLOCK_SIZE comp_inter_joint_search_thresh; + // The best compound predictor is found using an iterative log search process + // that searches for best ref0 mv using error of combined predictor and then + // searches for best ref1 mv. This sf determines the number of iterations of + // this process based on block size. The sf becomes more aggressive from level + // 0 to 2. The following table indicates the number of iterations w.r.t bsize: + // ----------------------------------------------- + // |sf (level)|bsize < 8X8| [8X8, 16X16] | > 16X16 | + // | 0 | 4 | 4 | 4 | + // | 1 | 0 | 2 | 4 | + // | 2 | 0 | 0 | 0 | + // ----------------------------------------------- + // Here, 0 iterations indicate using the best single motion vector selected + // for each ref frame without any iterative refinement. + int comp_inter_joint_search_iter_level; // This variable is used to cap the maximum number of times we skip testing a // mode to be evaluated. A high value means we will be faster. |