summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2023-04-24 21:15:14 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2023-04-24 21:15:14 +0000
commit52076a9c79b980180a86832acd20f17c1e583e3e (patch)
treecc0311095635f2ade1c2df2161e8cc23f0f249cf
parent24802201acd7dfa15928bcc47c1e270e7db5afac (diff)
parente7b58b69fd91a4288453c7c7003e1fc4cc48bb93 (diff)
downloadlibvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar
libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar.gz
libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.tar.bz2
libvpx-52076a9c79b980180a86832acd20f17c1e583e3e.zip
Merge "Reduce joint motion search iters based on bsize" into main
-rw-r--r--vp9/encoder/vp9_rdopt.c40
-rw-r--r--vp9/encoder/vp9_speed_features.c7
-rw-r--r--vp9/encoder/vp9_speed_features.h19
3 files changed, 49 insertions, 17 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c68cfefde..f051c6279 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1898,11 +1898,22 @@ static INLINE int skip_single_mode_based_on_mode_rate(
return 0;
}
-#define NUM_ITERS 4
+#define MAX_JOINT_MV_SEARCH_ITERS 4
+static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) {
+ int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0
+ if (sf_level >= 2)
+ num_iters = 0;
+ else if (sf_level >= 1)
+ num_iters = bsize < BLOCK_8X8
+ ? 0
+ : (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS);
+ return num_iters;
+}
+
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int_mv *frame_mv, int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES],
- int *rate_mv) {
+ int *rate_mv, int num_iters) {
const VP9_COMMON *const cm = &cpi->common;
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
@@ -1911,7 +1922,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int refs[2] = { mi->ref_frame[0],
mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
int_mv ref_mv[2];
- int_mv iter_mvs[NUM_ITERS][2];
+ int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2];
int ite, ref;
const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
struct scale_factors sf;
@@ -1932,6 +1943,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
#endif // CONFIG_VP9_HIGHBITDEPTH
+ // Check number of iterations do not exceed the max
+ assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS);
+
for (ref = 0; ref < 2; ++ref) {
ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
@@ -1962,7 +1976,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
// Allow joint search multiple times iteratively for each reference frame
// and break out of the search loop if it couldn't find a better mv.
- for (ite = 0; ite < NUM_ITERS; ite++) {
+ for (ite = 0; ite < num_iters; ite++) {
struct buf_2d ref_yv12[2];
uint32_t bestsme = UINT_MAX;
int sadpb = x->sadperbit16;
@@ -2044,7 +2058,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
} else {
break;
}
- if (ite < NUM_ITERS - 1) {
+ if (ite < num_iters - 1) {
iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int;
iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int;
}
@@ -2250,12 +2264,16 @@ static int64_t rd_pick_best_sub8x8_mode(
if (has_second_rf && this_mode == NEWMV &&
mi->interp_filter == EIGHTTAP) {
+ // Decide number of joint motion search iterations
+ const int num_joint_search_iters = get_joint_search_iters(
+ cpi->sf.comp_inter_joint_search_iter_level, bsize);
// adjust src pointers
mi_buf_shift(x, block);
- if (sf->comp_inter_joint_search_thresh <= bsize) {
+ if (num_joint_search_iters) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
- mi_col, seg_mvs[block], &rate_mv);
+ mi_col, seg_mvs[block], &rate_mv,
+ num_joint_search_iters);
seg_mvs[block][mi->ref_frame[0]].as_int =
frame_mv[this_mode][mi->ref_frame[0]].as_int;
seg_mvs[block][mi->ref_frame[1]].as_int =
@@ -2878,16 +2896,20 @@ static int64_t handle_inter_mode(
if (this_mode == NEWMV) {
int rate_mv;
if (is_comp_pred) {
+ // Decide number of joint motion search iterations
+ const int num_joint_search_iters = get_joint_search_iters(
+ cpi->sf.comp_inter_joint_search_iter_level, bsize);
+
// Initialize mv using single prediction mode result.
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
- if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
+ if (num_joint_search_iters) {
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, joint_motion_search_time);
#endif
joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
- single_newmv, &rate_mv);
+ single_newmv, &rate_mv, num_joint_search_iters);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, joint_motion_search_time);
#endif
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 04804da1c..60720e3ea 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -244,6 +244,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->trellis_opt_tx_rd.thresh = boosted ? 4.0 : 3.0;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ sf->comp_inter_joint_search_iter_level = 1;
// Reference masking is not supported in dynamic scaling mode.
sf->reference_masking = oxcf->resize_mode != RESIZE_DYNAMIC;
@@ -331,7 +332,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->comp_inter_joint_search_iter_level = 2;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->recode_tolerance_high = 45;
sf->enhanced_full_pixel_motion_search = 0;
@@ -530,7 +531,7 @@ static void set_rt_speed_feature_framesize_independent(
}
sf->disable_filter_search_var_thresh = 50;
- sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
+ sf->comp_inter_joint_search_iter_level = 2;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
@@ -928,7 +929,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) {
sf->mv.auto_mv_step_size = 0;
sf->mv.fullpel_search_step_param = 6;
sf->mv.use_downsampled_sad = 0;
- sf->comp_inter_joint_search_thresh = BLOCK_4X4;
+ sf->comp_inter_joint_search_iter_level = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 7cb3f3527..70c61fe00 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -286,11 +286,20 @@ typedef struct SPEED_FEATURES {
// adds overhead.
int static_segmentation;
- // If 1 we iterate finding a best reference for 2 ref frames together - via
- // a log search that iterates 4 times (check around mv for last for best
- // error of combined predictor then check around mv for alt). If 0 we
- // we just use the best motion vector found for each frame by itself.
- BLOCK_SIZE comp_inter_joint_search_thresh;
+ // The best compound predictor is found using an iterative log search process
+ // that searches for best ref0 mv using error of combined predictor and then
+ // searches for best ref1 mv. This sf determines the number of iterations of
+ // this process based on block size. The sf becomes more aggressive from level
+ // 0 to 2. The following table indicates the number of iterations w.r.t bsize:
+ // -----------------------------------------------
+ // |sf (level)|bsize < 8X8| [8X8, 16X16] | > 16X16 |
+ // | 0 | 4 | 4 | 4 |
+ // | 1 | 0 | 2 | 4 |
+ // | 2 | 0 | 0 | 0 |
+ // -----------------------------------------------
+ // Here, 0 iterations indicate using the best single motion vector selected
+ // for each ref frame without any iterative refinement.
+ int comp_inter_joint_search_iter_level;
// This variable is used to cap the maximum number of times we skip testing a
// mode to be evaluated. A high value means we will be faster.