diff options
author | Hui Su <huisu@google.com> | 2018-07-20 15:29:14 -0700 |
---|---|---|
committer | Hui Su <huisu@google.com> | 2018-07-23 10:22:32 -0700 |
commit | b54cdcc3de62390dc438a36425665a89958aefea (patch) | |
tree | 21a801b2b62cf84207bab351cca3322e6197de5b | |
parent | e858863dda2e242ede57916dae4086a991f618dd (diff) | |
download | libvpx-b54cdcc3de62390dc438a36425665a89958aefea.tar libvpx-b54cdcc3de62390dc438a36425665a89958aefea.tar.gz libvpx-b54cdcc3de62390dc438a36425665a89958aefea.tar.bz2 libvpx-b54cdcc3de62390dc438a36425665a89958aefea.zip |
Add prune_ref_frame_for_rect_partitions feature
Add a speed feature to prune reference frames for rectangular
partitions. Rectangular partition RD search happens after square
partition RD search. With this feature, we keep record of the ref
frames picked by square partitions, and only consider those ref
frames during rect partition RD search.
With this feature on, the computation cost of rect partition RD
search is greatly reduced, so we can afford to skip rect partition
RD search less aggressively.
Overall, both compression and encoding speed are improved. Only
speed 0 is affected.
Coding gains:
lowres midres hdres
ovr psnr 0.00% -0.36% -0.37%
avg psnr 0.00% -0.36% -0.36%
Tested encoding speed with QP=40 on about 30 sequences.
Speed gains:
lowres midres hdres
average 13.4% 7.1% 6.1%
max 28.0% 12.0% 9.8%
Change-Id: Id5f36dd2ac75028ae98550d67b0a524aa251b692
-rw-r--r-- | vp9/encoder/vp9_context_tree.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 53 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 15 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 3 |
5 files changed, 73 insertions, 9 deletions
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 73423c075..2bcc26e94 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -75,6 +75,8 @@ typedef struct { // Used for the machine learning-based early termination int32_t sum_y_eobs; + // Skip certain ref frames during RD search of rectangular partitions. + uint8_t skip_ref_frame_mask; } PICK_MODE_CONTEXT; typedef struct PC_TREE { diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8f622d0ed..f3a4ae7fe 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2633,6 +2633,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, ctx, INT64_MAX); break; case PARTITION_HORZ: + pc_tree->horizontal[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && @@ -2642,6 +2643,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + pc_tree->horizontal[1].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { @@ -2654,6 +2656,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, } break; case PARTITION_VERT: + pc_tree->vertical[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && @@ -2663,6 +2666,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); @@ -3712,10 +3716,12 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; int must_split = 0; - int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ ? x->cb_rdmult : cpi->rd.RDMULT; + // Ref frames picked in the [i_th] quarter subblock during square partition + // RD search. It may be used to prune ref frame selection of rect partitions. + uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; (void)*tp_orig; @@ -3846,6 +3852,14 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = ctx->mic.ref_frame[0]; + const int ref2 = ctx->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } if (bsize >= BLOCK_8X8) { this_rdc.rdcost += RDCOST(partition_mul, x->rddiv, cpi->partition_cost[pl][PARTITION_NONE], 0); @@ -3970,8 +3984,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); - - if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; + if (sum_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0]; + const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1]; + for (i = 0; i < 4; ++i) { + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } + } + } } else { for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); ++i) { @@ -3999,6 +4023,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, sum_rdc.rdcost = INT64_MAX; break; } else { + if (cpi->sf.prune_ref_frame_for_rect_partitions && + pc_tree->split[i]->none.rate != INT_MAX) { + const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0]; + const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1]; + ref_frames_used[i] |= (1 << ref1); + if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); + } sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; @@ -4036,6 +4067,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } + pc_tree->horizontal[0].skip_ref_frame_mask = 0; + pc_tree->horizontal[1].skip_ref_frame_mask = 0; + pc_tree->vertical[0].skip_ref_frame_mask = 0; + pc_tree->vertical[1].skip_ref_frame_mask = 0; + if (cpi->sf.prune_ref_frame_for_rect_partitions) { + uint8_t used_frames; + used_frames = ref_frames_used[0] | ref_frames_used[1]; + if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[2] | ref_frames_used[3]; + if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[0] | ref_frames_used[2]; + if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames; + used_frames = ref_frames_used[1] | ref_frames_used[3]; + if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames; + } + // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e3672edf5..ad4b48a9c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3073,6 +3073,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, // lock mechanism involved with reads from // tile_mode_map const int mode_search_skip_flags = sf->mode_search_skip_flags; + const int is_rect_partition = + num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; @@ -3224,6 +3226,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, vp9_zero(x->sum_y_eobs); + if (is_rect_partition) { + if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; + if (second_ref_frame > 0 && + (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) + continue; + } + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (midx == mode_skip_start && best_mode_index >= 0) { diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 80adf845e..75a8de270 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -70,11 +70,14 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, // speed 0 features sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; + sf->use_square_only_threshold = BLOCK_SIZES; - // Currently, the machine-learning based partition search early termination - // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. if (is_480p_or_larger) { + // Currently, the machine-learning based partition search early termination + // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. sf->ml_partition_search_early_termination = 1; + } else { + sf->use_square_only_threshold = BLOCK_32X32; } if (!is_1080p_or_larger) { @@ -92,6 +95,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (speed >= 1) { sf->ml_partition_search_early_termination = 0; + sf->use_square_only_threshold = BLOCK_4X4; if (is_720p_or_larger) { sf->disable_split_mask = @@ -193,7 +197,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->allow_skip_recode = 1; sf->less_rectangular_check = 1; sf->use_square_partition_only = !frame_is_boosted(cpi); - sf->use_square_only_threshold = BLOCK_16X16; + sf->prune_ref_frame_for_rect_partitions = 1; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { sf->exhaustive_searches_thresh = (1 << 22); @@ -210,6 +214,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, if (speed >= 1) { sf->enable_tpl_model = 0; + sf->prune_ref_frame_for_rect_partitions = 0; if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) || @@ -226,10 +231,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5]; sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5]; - - sf->use_square_only_threshold = BLOCK_4X4; sf->less_rectangular_check = 1; - sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->mv.auto_mv_step_size = 1; @@ -848,6 +850,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { #else sf->enable_tpl_model = 1; #endif + sf->prune_ref_frame_for_rect_partitions = 0; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 7a591e491..fd4973fb2 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -319,6 +319,9 @@ typedef struct SPEED_FEATURES { int use_square_partition_only; BLOCK_SIZE use_square_only_threshold; + // Prune reference frames for rectangular partitions. + int prune_ref_frame_for_rect_partitions; + // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; |