diff options
Diffstat (limited to 'vp9/encoder/vp9_encodeframe.c')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 99 |
1 files changed, 62 insertions, 37 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5adefac1a..236567f94 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3440,18 +3440,59 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, #undef FEATURES #undef LABELS +// Perform fast and coarse motion search for the given block. This is a +// pre-processing step for the ML based partition search speedup. +static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + MV ref_mv, MV_REFERENCE_FRAME ref, + uint8_t *const pred_buf) { + const VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref); + const int step_param = 1; + const MvLimits tmp_mv_limits = x->mv_limits; + const SEARCH_METHODS search_method = NSTEP; + const int sadpb = x->sadperbit16; + MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; + MV best_mv = { 0, 0 }; + int cost_list[5]; + + assert(yv12 != NULL); + if (!yv12) return; + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[ref - 1].sf); + mi->ref_frame[0] = ref; + mi->ref_frame[1] = NONE; + mi->sb_type = bsize; + vp9_set_mv_search_range(&x->mv_limits, &ref_mv); + vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, + sadpb, cond_cost_list(cpi, cost_list), &ref_mv, + &best_mv, 0, 0); + best_mv.row *= 8; + best_mv.col *= 8; + x->mv_limits = tmp_mv_limits; + mi->mv[0].as_mv = best_mv; + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + xd->plane[0].dst.buf = pred_buf; + xd->plane[0].dst.stride = 64; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); +} + // Use a neural net model to prune partition-none and partition-split search. // The model uses prediction residue variance and quantization step size as // input features. #define FEATURES 6 -static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, +static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi, + MACROBLOCK *const x, + PC_TREE *const pc_tree, BLOCK_SIZE bsize, int mi_row, int mi_col, int *none, int *split) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0]; + const VP9_COMMON *const cm = &cpi->common; const NN_CONFIG *nn_config = NULL; #if CONFIG_VP9_HIGHBITDEPTH + MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (CONVERT_TO_BYTEPTR(pred_buffer)) @@ -3489,41 +3530,20 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, if (!nn_config) return; - mi->ref_frame[1] = NONE; - mi->sb_type = bsize; // Do a simple single motion search to find a prediction for current block. // The variance of the residue will be used as input features. { + MV ref_mv; const MV_REFERENCE_FRAME ref = cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; - YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref); - MV ref_mv = { 0, 0 }; - MV ref_mv_full = { 0, 0 }; - const int step_param = 1; - const MvLimits tmp_mv_limits = x->mv_limits; - const SEARCH_METHODS search_method = NSTEP; - const int sadpb = x->sadperbit16; - MV best_mv = { 0, 0 }; - int cost_list[5]; - - assert(yv12 != NULL); - if (!yv12) return; - vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, - &cm->frame_refs[ref - 1].sf); - mi->ref_frame[0] = ref; - vp9_set_mv_search_range(&x->mv_limits, &ref_mv); - vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, - search_method, sadpb, cond_cost_list(cpi, cost_list), - &ref_mv, &best_mv, 0, 0); - best_mv.row *= 8; - best_mv.col *= 8; - x->mv_limits = tmp_mv_limits; - mi->mv[0].as_mv = best_mv; - - set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); - xd->plane[0].dst.buf = pred_buf; - xd->plane[0].dst.stride = 64; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + // If bsize is 64x64, use zero MV as reference; otherwise, use MV result + // of previous(larger) block as reference. + if (bsize == BLOCK_64X64) + ref_mv.row = ref_mv.col = 0; + else + ref_mv = pc_tree->mv; + simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); + pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; } vpx_clear_system_state(); @@ -3818,14 +3838,19 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_NONE; - if (cpi->sf.ml_var_partition_pruning) { + if (cpi->sf.ml_var_partition_pruning && !frame_is_intra_only(cm)) { const int do_ml_var_partition_pruning = - !frame_is_intra_only(cm) && partition_none_allowed && do_split && + partition_none_allowed && do_split && mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; if (do_ml_var_partition_pruning) { - ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col, + ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col, &partition_none_allowed, &do_split); + } else { + vp9_zero(pc_tree->mv); + } + if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. + for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv; } } |