diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 79 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 21 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 561 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 142 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 83 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 104 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 13 |
9 files changed, 561 insertions, 446 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d17952487..6894f553f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1923,9 +1923,6 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, vp9_zero(cpi->mb.pred_mv); - if (cpi->sf.reference_masking) - rd_pick_reference_frame(cpi, tile, mi_row, mi_col); - if (cpi->sf.use_lastframe_partitioning || cpi->sf.use_one_partition_size_always ) { const int idx_str = cm->mode_info_stride * mi_row + mi_col; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index c51ce9f54..500255748 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -106,6 +106,7 @@ static int lookup_next_frame_stats(const struct twopass_rc *p, return 1; } + // Read frame stats at an offset from the current position static int read_frame_stats(const struct twopass_rc *p, FIRSTPASS_STATS *frame_stats, int offset) { @@ -149,7 +150,7 @@ static void output_stats(const VP9_COMP *cpi, FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); - fprintf(stdout, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" + fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f" "%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f" "%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n", stats->frame, @@ -349,17 +350,14 @@ static double simple_weight(YV12_BUFFER_CONFIG *source) { } -// This function returns the current per frame maximum bitrate target. +// This function returns the maximum target rate per frame. static int frame_max_bits(VP9_COMP *cpi) { - // Max allocation for a single frame based on the max section guidelines - // passed in and how many bits are left. - // For VBR base this on the bits and frames left plus the - // two_pass_vbrmax_section rate passed in by the user. - const double max_bits = (1.0 * cpi->twopass.bits_left / - (cpi->twopass.total_stats.count - cpi->common.current_video_frame)) * - (cpi->oxcf.two_pass_vbrmax_section / 100.0); + int64_t max_bits = + ((int64_t)cpi->rc.av_per_frame_bandwidth * + (int64_t)cpi->oxcf.two_pass_vbrmax_section) / 100; + if (max_bits < 0) - return 0; + return 0; if (max_bits >= INT_MAX) return INT_MAX; return (int)max_bits; @@ -716,7 +714,7 @@ void vp9_first_pass(VP9_COMP *cpi) { mv.as_mv.row *= 8; mv.as_mv.col *= 8; this_error = motion_error; - vp9_set_mbmode_and_mvs(x, NEWMV, &mv); + vp9_set_mbmode_and_mvs(xd, NEWMV, &mv.as_mv); xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; @@ -1662,7 +1660,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Don't allow a gf too near the next kf if ((cpi->rc.frames_to_key - i) < MIN_GF_INTERVAL) { - while (i < cpi->rc.frames_to_key) { + while (i < (cpi->rc.frames_to_key + !cpi->rc.next_key_frame_forced)) { i++; if (EOF == input_stats(&cpi->twopass, this_frame)) @@ -1697,6 +1695,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && (i >= MIN_GF_INTERVAL) && + // for real scene cuts (not forced kfs) dont allow arf very near kf. + (cpi->rc.next_key_frame_forced || + (i <= (cpi->rc.frames_to_key - MIN_GF_INTERVAL))) && ((next_frame.pcnt_inter > 0.75) || (next_frame.pcnt_second_ref > 0.5)) && ((mv_in_out_accumulator / (double)i > -0.2) || @@ -1765,18 +1766,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { #endif #endif - // Now decide how many bits should be allocated to the GF group as a - // proportion of those remaining in the kf group. - // The final key frame group in the clip is treated as a special case - // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. - // This is also important for short clips where there may only be one - // key frame. - if (cpi->rc.frames_to_key >= (int)(cpi->twopass.total_stats.count - - cpi->common.current_video_frame)) { - cpi->twopass.kf_group_bits = - (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; - } - // Calculate the bits to be allocated to the group as a whole if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { @@ -1836,7 +1825,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // If the frame that is to be boosted is simpler than the average for // the gf/arf group then use an alternative calculation // based on the error score of the frame itself - if (mod_frame_err < gf_group_err / (double)cpi->rc.baseline_gf_interval) { + if (cpi->rc.baseline_gf_interval < 1 || + mod_frame_err < gf_group_err / (double)cpi->rc.baseline_gf_interval) { double alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits * (mod_frame_err * (double)cpi->rc.baseline_gf_interval) / @@ -1863,9 +1853,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (gf_bits < 0) gf_bits = 0; - // Add in minimum for a frame - gf_bits += cpi->rc.min_frame_bandwidth; - if (i == 0) { cpi->twopass.gf_bits = gf_bits; } @@ -1899,8 +1886,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->twopass.gf_group_error_left = (int64_t)gf_group_err; } - cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - - cpi->rc.min_frame_bandwidth; + cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; @@ -1985,9 +1971,6 @@ static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; - // Add in the minimum number of bits that is set aside for every frame. - target_frame_size += cpi->rc.min_frame_bandwidth; - // Per frame bit target for this frame. cpi->rc.per_frame_bandwidth = target_frame_size; } @@ -2029,6 +2012,22 @@ void vp9_get_one_pass_params(VP9_COMP *cpi) { } } +void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if ((cm->current_video_frame == 0 || + cm->frame_flags & FRAMEFLAGS_KEY || + cpi->rc.frames_to_key == 0 || + (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + cm->frame_type = KEY_FRAME; + cpi->rc.frames_to_key = cpi->key_frame_frequency; + } else { + cm->frame_type = INTER_FRAME; + } + // Don't use gf_update by default in CBR mode. + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; +} + void vp9_get_first_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && @@ -2265,8 +2264,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vp9_zero(next_frame); vp9_clear_system_state(); // __asm emms; - start_position = cpi->twopass.stats_in; + start_position = cpi->twopass.stats_in; cpi->common.frame_type = KEY_FRAME; // is this a forced key frame by interval @@ -2348,7 +2347,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // interval is between 1x and 2x if (cpi->oxcf.auto_key && cpi->rc.frames_to_key > (int)cpi->key_frame_frequency) { - FIRSTPASS_STATS *current_pos = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_frame; cpi->rc.frames_to_key /= 2; @@ -2373,15 +2371,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Load a the next frame's stats input_stats(&cpi->twopass, &tmp_frame); } - - // Reset to the start of the group - reset_fpf_position(&cpi->twopass, current_pos); - + cpi->rc.next_key_frame_forced = 1; + } else if (cpi->twopass.stats_in == cpi->twopass.stats_in_end) { cpi->rc.next_key_frame_forced = 1; } else { cpi->rc.next_key_frame_forced = 0; } - // Special case for the last frame of the file + + // Special case for the last key frame of the file if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { // Accumulate kf group error kf_group_err += calculate_modified_err(cpi, this_frame); @@ -2566,8 +2563,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; - // Add in the minimum frame allowance - cpi->twopass.kf_bits += cpi->rc.min_frame_bandwidth; // Peer frame bit target for this frame cpi->rc.per_frame_bandwidth = cpi->twopass.kf_bits; diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 43703c2c5..f89e4cb1c 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -22,6 +22,7 @@ void vp9_end_second_pass(VP9_COMP *cpi); void vp9_get_first_pass_params(VP9_COMP *cpi); void vp9_get_one_pass_params(VP9_COMP *cpi); +void vp9_get_one_pass_cbr_params(VP9_COMP *cpi); void vp9_get_svc_params(VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_FIRSTPASS_H_ diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index f3ddd39b6..c50098678 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -23,7 +23,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, - int_mv *dst_mv, + MV *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->mb; @@ -35,7 +35,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const int tmp_col_max = x->mv_col_max; const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; - int_mv ref_full; + MV ref_full; // Further step/diamond searches as necessary int step_param = cpi->sf.reduce_first_step_size + @@ -44,12 +44,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, vp9_set_mv_search_range(x, ref_mv); - ref_full.as_mv.col = ref_mv->col >> 3; - ref_full.as_mv.row = ref_mv->row >> 3; + ref_full.col = ref_mv->col >> 3; + ref_full.row = ref_mv->row >> 3; /*cpi->sf.search_method == HEX*/ - best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit, - 0, &v_fn_ptr, 0, ref_mv, &dst_mv->as_mv); + best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit, + 0, &v_fn_ptr, 0, ref_mv, dst_mv); // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) @@ -57,15 +57,14 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int distortion; unsigned int sse; best_err = cpi->find_fractional_mv_step( - x, - &dst_mv->as_mv, ref_mv, + x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, cpi->sf.subpel_iters_per_step, NULL, NULL, & distortion, &sse); } - vp9_set_mbmode_and_mvs(x, NEWMV, dst_mv); + vp9_set_mbmode_and_mvs(xd, NEWMV, dst_mv); vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); best_err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, @@ -96,7 +95,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv, // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv, + tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv.as_mv, mb_row, mb_col); if (tmp_err < err) { err = tmp_err; @@ -110,7 +109,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv, int_mv zero_ref_mv, tmp_mv; zero_ref_mv.as_int = 0; - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv, + tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv.as_mv, mb_row, mb_col); if (tmp_err < err) { dst_mv->as_int = tmp_mv.as_int; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 7a1f5c1a9..291a55ecb 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -581,6 +581,177 @@ static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) { sf->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } +static void set_good_speed_feature(VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { + int i; + sf->adaptive_rd_thresh = 1; + sf->recode_loop = (speed < 1); + if (speed == 1) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) + ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 1; + sf->auto_mv_step_size = 1; + sf->adaptive_rd_thresh = 2; + sf->recode_loop = 2; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + if (speed == 2) { + sf->use_square_partition_only = !frame_is_intra_only(cm); + sf->less_rectangular_check = 1; + sf->tx_size_search_method = frame_is_intra_only(cm) + ? USE_FULL_RD : USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = cm->show_frame ? + DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + else + sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; + sf->auto_mv_step_size = 1; + + sf->disable_filter_search_var_thresh = 50; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->adaptive_rd_thresh = 2; + sf->recode_loop = 2; + sf->use_lp32x32fdct = 1; + sf->mode_skip_start = 11; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + } + if (speed == 3) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + + if (MIN(cm->width, cm->height) >= 720) + sf->disable_split_mask = DISABLE_ALL_SPLIT; + else + sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_INTRA_LOWVAR; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; + sf->auto_mv_step_size = 1; + + sf->disable_filter_search_var_thresh = 100; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->use_lp32x32fdct = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = 2; + + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + } + if (speed == 4) { + sf->use_square_partition_only = 1; + sf->tx_size_search_method = USE_LARGESTALL; + sf->disable_split_mask = DISABLE_ALL_SPLIT; + + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; + + sf->use_rd_breakout = 1; + sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; + sf->reference_masking = 1; + sf->auto_mv_step_size = 1; + + sf->disable_filter_search_var_thresh = 200; + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + + sf->auto_min_max_partition_size = 1; + sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; + sf->adjust_partitioning_from_last_frame = 1; + sf->last_partitioning_redo_frequency = 3; + + sf->use_uv_intra_rd_estimate = 1; + sf->skip_encode_sb = 1; + sf->use_lp32x32fdct = 1; + sf->subpel_iters_per_step = 1; + sf->use_fast_coef_updates = 2; + + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + } + if (speed == 5) { + sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->use_one_partition_size_always = 1; + sf->always_this_block_size = BLOCK_16X16; + sf->tx_size_search_method = frame_is_intra_only(cm) ? + USE_FULL_RD : USE_LARGESTALL; + sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | + FLAG_SKIP_INTRA_BESTINTER | + FLAG_SKIP_COMP_BESTINTRA | + FLAG_SKIP_COMP_REFMISMATCH | + FLAG_SKIP_INTRA_LOWVAR | + FLAG_EARLY_TERMINATE; + sf->use_rd_breakout = 1; + sf->use_lp32x32fdct = 1; + sf->optimize_coefficients = 0; + sf->auto_mv_step_size = 1; + sf->reference_masking = 1; + + sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->search_method = HEX; + sf->subpel_iters_per_step = 1; + sf->disable_split_var_thresh = 64; + sf->disable_filter_search_var_thresh = 500; + for (i = 0; i < TX_SIZES; i++) { + sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + } + sf->use_fast_coef_updates = 2; + sf->adaptive_rd_thresh = 4; + sf->mode_skip_start = 6; + } +} static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { @@ -629,6 +800,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->adaptive_motion_search = 1; sf->adaptive_pred_filter_type = 2; sf->auto_mv_step_size = 1; + sf->reference_masking = 1; sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -699,7 +871,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // best quality defaults sf->RD = 1; sf->search_method = NSTEP; - sf->auto_filter = 1; sf->recode_loop = 1; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; @@ -744,179 +915,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->diamond_search_sad = vp9_full_range_search; break; case 1: - sf->adaptive_rd_thresh = 1; - sf->recode_loop = (speed < 1); - - if (speed == 1) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; - sf->auto_mv_step_size = 1; - sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) - ? USE_FULL_RD : USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; - sf->use_lp32x32fdct = 1; - sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - } - if (speed == 3) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - - if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = DISABLE_ALL_SPLIT; - else - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_INTRA_LOWVAR; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } - if (speed == 4) { - sf->use_square_partition_only = 1; - sf->tx_size_search_method = USE_LARGESTALL; - sf->disable_split_mask = DISABLE_ALL_SPLIT; - - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; - sf->auto_mv_step_size = 1; - - sf->disable_filter_search_var_thresh = 200; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - - sf->auto_min_max_partition_size = 1; - sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; - sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; - - sf->use_uv_intra_rd_estimate = 1; - sf->skip_encode_sb = 1; - sf->use_lp32x32fdct = 1; - sf->subpel_iters_per_step = 1; - sf->use_fast_coef_updates = 2; - - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - - /* sf->intra_y_mode_mask = INTRA_DC_ONLY; - sf->intra_uv_mode_mask = INTRA_DC_ONLY; - sf->search_method = BIGDIA; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 64; */ - } - if (speed == 5) { - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->use_one_partition_size_always = 1; - sf->always_this_block_size = BLOCK_16X16; - sf->tx_size_search_method = frame_is_intra_only(cm) ? - USE_FULL_RD : USE_LARGESTALL; - sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | - FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | - FLAG_SKIP_COMP_REFMISMATCH | - FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->use_rd_breakout = 1; - sf->use_lp32x32fdct = 1; - sf->optimize_coefficients = 0; - sf->auto_mv_step_size = 1; - // sf->reduce_first_step_size = 1; - // sf->reference_masking = 1; - - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->search_method = HEX; - sf->subpel_iters_per_step = 1; - sf->disable_split_var_thresh = 64; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; - } - sf->use_fast_coef_updates = 2; - sf->adaptive_rd_thresh = 4; - sf->mode_skip_start = 6; - } + set_good_speed_feature(cm, sf, speed); + break; break; case 2: set_rt_speed_feature(cm, sf, speed); @@ -1653,16 +1653,20 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->bytes = 0; if (cpi->b_calculate_psnr) { - cpi->total_sq_error = 0.0; - cpi->total_sq_error2 = 0.0; cpi->total_y = 0.0; cpi->total_u = 0.0; cpi->total_v = 0.0; cpi->total = 0.0; + cpi->total_sq_error = 0; + cpi->total_samples = 0; + cpi->totalp_y = 0.0; cpi->totalp_u = 0.0; cpi->totalp_v = 0.0; cpi->totalp = 0.0; + cpi->totalp_sq_error = 0; + cpi->totalp_samples = 0; + cpi->tot_recode_hits = 0; cpi->summed_quality = 0; cpi->summed_weights = 0; @@ -1897,21 +1901,20 @@ void vp9_remove_compressor(VP9_PTR *ptr) { / time_encoded; if (cpi->b_calculate_psnr) { - YV12_BUFFER_CONFIG *lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - double samples = 3.0 / 2 * cpi->count * - lst_yv12->y_width * lst_yv12->y_height; - double total_psnr = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error); - double total_psnr2 = vp9_mse2psnr(samples, 255.0, cpi->total_sq_error2); - double total_ssim = 100 * pow(cpi->summed_quality / - cpi->summed_weights, 8.0); - double total_ssimp = 100 * pow(cpi->summedp_quality / - cpi->summedp_weights, 8.0); + const double total_psnr = vp9_mse2psnr(cpi->total_samples, 255.0, + cpi->total_sq_error); + const double totalp_psnr = vp9_mse2psnr(cpi->totalp_samples, 255.0, + cpi->totalp_sq_error); + const double total_ssim = 100 * pow(cpi->summed_quality / + cpi->summed_weights, 8.0); + const double totalp_ssim = 100 * pow(cpi->summedp_quality / + cpi->summedp_weights, 8.0); fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" "VPXSSIM\tVPSSIMP\t Time(ms)\n"); fprintf(f, "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t%8.0f\n", dr, cpi->total / cpi->count, total_psnr, - cpi->totalp / cpi->count, total_psnr2, total_ssim, total_ssimp, + cpi->totalp / cpi->count, totalp_psnr, total_ssim, totalp_ssim, total_encode_time); } @@ -2055,8 +2058,8 @@ void vp9_remove_compressor(VP9_PTR *ptr) { } -static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, - uint8_t *recon, int recon_stride, +static uint64_t calc_plane_error(const uint8_t *orig, int orig_stride, + const uint8_t *recon, int recon_stride, unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; @@ -2073,8 +2076,8 @@ static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, /* Handle odd-sized width */ if (col < cols) { unsigned int border_row, border_col; - uint8_t *border_orig = orig; - uint8_t *border_recon = recon; + const uint8_t *border_orig = orig; + const uint8_t *border_recon = recon; for (border_row = 0; border_row < 16; border_row++) { for (border_col = col; border_col < cols; border_col++) { @@ -2105,51 +2108,57 @@ static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, return total_sse; } +typedef struct { + double psnr[4]; // total/y/u/v + uint64_t sse[4]; // total/y/u/v + uint32_t samples[4]; // total/y/u/v +} PSNR_STATS; + +static void calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, + PSNR_STATS *psnr) { + const int widths[3] = {a->y_width, a->uv_width, a->uv_width }; + const int heights[3] = {a->y_height, a->uv_height, a->uv_height}; + const uint8_t *a_planes[3] = {a->y_buffer, a->u_buffer, a->v_buffer }; + const int a_strides[3] = {a->y_stride, a->uv_stride, a->uv_stride}; + const uint8_t *b_planes[3] = {b->y_buffer, b->u_buffer, b->v_buffer }; + const int b_strides[3] = {b->y_stride, b->uv_stride, b->uv_stride}; + int i; + uint64_t total_sse = 0; + uint32_t total_samples = 0; -static void generate_psnr_packet(VP9_COMP *cpi) { - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - struct vpx_codec_cx_pkt pkt; - uint64_t sse; - int i; - unsigned int width = orig->y_crop_width; - unsigned int height = orig->y_crop_height; + for (i = 0; i < 3; ++i) { + const int w = widths[i]; + const int h = heights[i]; + const uint32_t samples = w * h; + const double sse = calc_plane_error(a_planes[i], a_strides[i], + b_planes[i], b_strides[i], + w, h); + psnr->sse[1 + i] = sse; + psnr->samples[1 + i] = samples; + psnr->psnr[1 + i] = vp9_mse2psnr(samples, 255.0, sse); - pkt.kind = VPX_CODEC_PSNR_PKT; - sse = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, - width, height); - pkt.data.psnr.sse[0] = sse; - pkt.data.psnr.sse[1] = sse; - pkt.data.psnr.samples[0] = width * height; - pkt.data.psnr.samples[1] = width * height; - - width = orig->uv_crop_width; - height = orig->uv_crop_height; - - sse = calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, - width, height); - pkt.data.psnr.sse[0] += sse; - pkt.data.psnr.sse[2] = sse; - pkt.data.psnr.samples[0] += width * height; - pkt.data.psnr.samples[2] = width * height; - - sse = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, - width, height); - pkt.data.psnr.sse[0] += sse; - pkt.data.psnr.sse[3] = sse; - pkt.data.psnr.samples[0] += width * height; - pkt.data.psnr.samples[3] = width * height; - - for (i = 0; i < 4; i++) - pkt.data.psnr.psnr[i] = vp9_mse2psnr(pkt.data.psnr.samples[i], 255.0, - (double)pkt.data.psnr.sse[i]); + total_sse += sse; + total_samples += samples; + } - vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); + psnr->sse[0] = total_sse; + psnr->samples[0] = total_samples; + psnr->psnr[0] = vp9_mse2psnr(total_samples, 255.0, total_sse); } +static void generate_psnr_packet(VP9_COMP *cpi) { + struct vpx_codec_cx_pkt pkt; + int i; + PSNR_STATS psnr; + calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr); + for (i = 0; i < 4; ++i) { + pkt.data.psnr.samples[i] = psnr.samples[i]; + pkt.data.psnr.sse[i] = psnr.sse[i]; + pkt.data.psnr.psnr[i] = psnr.psnr[i]; + } + pkt.kind = VPX_CODEC_PSNR_PKT; + vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); +} int vp9_use_as_reference(VP9_PTR ptr, int ref_frame_flags) { VP9_COMP *cpi = (VP9_COMP *)(ptr); @@ -3247,7 +3256,11 @@ static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { - vp9_get_one_pass_params(cpi); + if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + vp9_get_one_pass_cbr_params(cpi); + } else { + vp9_get_one_pass_params(cpi); + } encode_frame_to_data_rate(cpi, size, dest, frame_flags); } @@ -3404,7 +3417,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #endif frames_to_arf = cpi->rc.frames_till_gf_update_due; - assert(frames_to_arf < cpi->rc.frames_to_key); + assert(frames_to_arf <= cpi->rc.frames_to_key); if ((cpi->source = vp9_lookahead_peek(cpi->lookahead, frames_to_arf))) { #if CONFIG_MULTIPLE_ARF @@ -3599,76 +3612,43 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->count++; if (cpi->b_calculate_psnr) { - double ye, ue, ve; - double frame_psnr; - YV12_BUFFER_CONFIG *orig = cpi->Source; - YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; - int y_samples = orig->y_height * orig->y_width; - int uv_samples = orig->uv_height * orig->uv_width; - int t_samples = y_samples + 2 * uv_samples; - double sq_error; - - ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, - orig->y_crop_width, orig->y_crop_height); - - ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, - orig->uv_crop_width, orig->uv_crop_height); - - ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, - orig->uv_crop_width, orig->uv_crop_height); - - sq_error = ye + ue + ve; - - frame_psnr = vp9_mse2psnr(t_samples, 255.0, sq_error); - - cpi->total_y += vp9_mse2psnr(y_samples, 255.0, ye); - cpi->total_u += vp9_mse2psnr(uv_samples, 255.0, ue); - cpi->total_v += vp9_mse2psnr(uv_samples, 255.0, ve); - cpi->total_sq_error += sq_error; - cpi->total += frame_psnr; + YV12_BUFFER_CONFIG *orig = cpi->Source; + YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; + YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; + PSNR_STATS psnr; + calc_psnr(orig, recon, &psnr); + + cpi->total += psnr.psnr[0]; + cpi->total_y += psnr.psnr[1]; + cpi->total_u += psnr.psnr[2]; + cpi->total_v += psnr.psnr[3]; + cpi->total_sq_error += psnr.sse[0]; + cpi->total_samples += psnr.samples[0]; + { - double frame_psnr2, frame_ssim2 = 0; - double weight = 0; + PSNR_STATS psnr2; + double frame_ssim2 = 0, weight = 0; #if CONFIG_VP9_POSTPROC vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->lf.filter_level * 10 / 6); #endif vp9_clear_system_state(); - ye = (double)calc_plane_error(orig->y_buffer, orig->y_stride, - pp->y_buffer, pp->y_stride, - orig->y_crop_width, orig->y_crop_height); - - ue = (double)calc_plane_error(orig->u_buffer, orig->uv_stride, - pp->u_buffer, pp->uv_stride, - orig->uv_crop_width, orig->uv_crop_height); - - ve = (double)calc_plane_error(orig->v_buffer, orig->uv_stride, - pp->v_buffer, pp->uv_stride, - orig->uv_crop_width, orig->uv_crop_height); - - sq_error = ye + ue + ve; - - frame_psnr2 = vp9_mse2psnr(t_samples, 255.0, sq_error); + calc_psnr(orig, pp, &psnr2); - cpi->totalp_y += vp9_mse2psnr(y_samples, 255.0, ye); - cpi->totalp_u += vp9_mse2psnr(uv_samples, 255.0, ue); - cpi->totalp_v += vp9_mse2psnr(uv_samples, 255.0, ve); - cpi->total_sq_error2 += sq_error; - cpi->totalp += frame_psnr2; + cpi->totalp += psnr2.psnr[0]; + cpi->totalp_y += psnr2.psnr[1]; + cpi->totalp_u += psnr2.psnr[2]; + cpi->totalp_v += psnr2.psnr[3]; + cpi->totalp_sq_error += psnr2.sse[0]; + cpi->totalp_samples += psnr2.samples[0]; - frame_ssim2 = vp9_calc_ssim(cpi->Source, - recon, 1, &weight); + frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; - frame_ssim2 = vp9_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight); + frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, 1, &weight); cpi->summedp_quality += frame_ssim2 * weight; cpi->summedp_weights += weight; @@ -3686,8 +3666,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; - frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, - &y, &u, &v); + frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v); cpi->total_ssimg_y += y; cpi->total_ssimg_u += u; cpi->total_ssimg_v += v; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 6dde3bea7..a5be0f424 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -232,57 +232,185 @@ typedef enum { } LAST_FRAME_PARTITION_METHOD; typedef struct { + // This flag refers to whether or not to perform rd optimization. int RD; + + // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; - int auto_filter; + + // Recode_loop can be: + // 0 means we only encode a frame once + // 1 means we can re-encode based on bitrate constraints on any frame + // 2 means we can only recode gold, alt, and key frames. int recode_loop; + + // Subpel_search_method can only be subpel_tree which does a subpixel + // logarithmic search that keeps stepping at 1/2 pixel units until + // you stop getting a gain, and then goes on to 1/4 and repeats + // the same process. Along the way it skips many diagonals. SUBPEL_SEARCH_METHODS subpel_search_method; + + // Maximum number of steps in logarithmic subpel search before giving up. int subpel_iters_per_step; + + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. int thresh_mult[MAX_MODES]; int thresh_mult_sub8x8[MAX_REFS]; + + // This parameter controls the number of steps we'll do in a diamond + // search. int max_step_search_steps; + + // This parameter controls which step in the n-step process we start at. + // It's changed adaptively based on circumstances. int reduce_first_step_size; + + // If this is set to 1, we limit the motion search range to 2 times the + // largest motion vector found in the last frame. int auto_mv_step_size; + + // Trellis (dynamic programming) optimization of quantized values (+1, 0). int optimize_coefficients; + + // Always set to 0. If on it enables 0 cost background transmission + // (except for the initial transmission of the segmentation). The feature is + // disabled because the addition of very large block sizes make the + // backgrounds very to cheap to encode, and the segmentation we have + // adds overhead. int static_segmentation; + + // If 1 we iterate finding a best reference for 2 ref frames together - via + // a log search that iterates 4 times (check around mv for last for best + // error of combined predictor then check around mv for alt). If 0 we + // we just use the best motion vector found for each frame by itself. int comp_inter_joint_search_thresh; + + // This variable is used to cap the maximum number of times we skip testing a + // mode to be evaluated. A high value means we will be faster. int adaptive_rd_thresh; + + // Enables skipping the reconstruction step (idct, recon) in the + // intermediate steps assuming the last frame didn't have too many intra + // blocks and the q is less than a threshold. int skip_encode_sb; int skip_encode_frame; + + // This variable allows us to reuse the last frames partition choices + // (64x64 v 32x32 etc) for this frame. It can be set to only use the last + // frame as a starting point in low motion scenes or always use it. If set + // we use last partitioning_redo frequency to determine how often to redo + // the partitioning from scratch. Adjust_partitioning_from_last_frame + // enables us to adjust up or down one partitioning from the last frames + // partitioning. LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + + // Determine which method we use to determine transform size. We can choose + // between options like full rd, largest for prediction size, largest + // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; + + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less + // precise but significantly faster than the non lp version. int use_lp32x32fdct; + + // TODO(JBB): remove this as its no longer used. + + // If set partition size will always be always_this_block_size. int use_one_partition_size_always; + + // Skip rectangular partition test when partition type none gives better + // rd than partition type split. int less_rectangular_check; + + // Disable testing non square partitions. (eg 16x32) int use_square_partition_only; + + // After looking at the first set of modes (set by index here), skip + // checking modes for reference frames that don't match the reference frame + // of the best so far. int mode_skip_start; + + // TODO(JBB): Remove this. int reference_masking; + + // Used in conjunction with use_one_partition_size_always. BLOCK_SIZE always_this_block_size; + + // Sets min and max partition sizes for this 64x64 region based on the + // same superblock in last encoded frame, and the left and above neighbor + // in this block. int auto_min_max_partition_size; + + // Min and max partition size we enable (block_size) as per auto + // min max, but also used by adjust partitioning, and pick_partitioning. BLOCK_SIZE min_partition_size; BLOCK_SIZE max_partition_size; + + // Whether or not we allow partitions one smaller or one greater than the last + // frame's partitioning. Only used if use_lastframe_partitioning is set. int adjust_partitioning_from_last_frame; + + // How frequently we re do the partitioning from scratch. Only used if + // use_lastframe_partitioning is set. int last_partitioning_redo_frequency; + + // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable + // it always, to allow it for only Last frame and Intra, disable it for all + // inter modes or to enable it always. int disable_split_mask; + + // TODO(jbb): Remove this and everything that uses it. It's only valid if + // we were doing small to large partition checks. We currently do the + // reverse. int using_small_partition_info; + // TODO(jingning): combine the related motion search speed features + // This allows us to use motion search at other sizes as a starting + // point for this motion search and limits the search range around it. int adaptive_motion_search; + + // Allows sub 8x8 modes to use the prediction filter that was determined + // best for 8x8 mode. If set to 0 we always re check all the filters for + // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter + // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. int adaptive_pred_filter_type; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags // defined in the MODE_SEARCH_SKIP_HEURISTICS enum unsigned int mode_search_skip_flags; + // A source variance threshold below which the split mode is disabled unsigned int disable_split_var_thresh; + // A source variance threshold below which filter search is disabled // Choose a very large value (UINT_MAX) to use 8-tap always unsigned int disable_filter_search_var_thresh; + + // These bit masks allow you to enable or disable intra modes for each + // transform size separately. int intra_y_mode_mask[TX_SIZES]; int intra_uv_mode_mask[TX_SIZES]; + + // This variable enables an early break out of mode testing if the model for + // rd built from the prediction signal indicates a value that's much + // higher than the best rd we've seen so far. int use_rd_breakout; + + // This enables us to use an estimate for intra rd based on dc mode rather + // than choosing an actual uv mode in the stage of encoding before the actual + // final encode. int use_uv_intra_rd_estimate; + + // This picks a loop filter strength by trying a small portion of the image + // with different values. int use_fast_lpf_pick; + + // This feature limits the number of coefficients updates we actually do + // by only looking at counts from 1/2 the bands. int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced } SPEED_FEATURES; @@ -572,12 +700,16 @@ typedef struct VP9_COMP { double total_u; double total_v; double total; - double total_sq_error; + uint64_t total_sq_error; + uint64_t total_samples; + double totalp_y; double totalp_u; double totalp_v; double totalp; - double total_sq_error2; + uint64_t totalp_sq_error; + uint64_t totalp_samples; + int bytes; double summed_quality; double summed_weights; @@ -601,7 +733,7 @@ typedef struct VP9_COMP { int *mb_norm_activity_map; int output_partition; - /* force next frame to intra when kf_auto says so */ + // Force next frame to intra when kf_auto says so. int force_next_frame_intra; int droppable; @@ -643,7 +775,7 @@ typedef struct VP9_COMP { int64_t mode_test_hits[BLOCK_SIZES]; #endif - /* Y,U,V,(A) */ + // Y,U,V,(A) ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 728f238e4..aefef5319 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -866,36 +866,35 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; // Update rate control heuristics - cpi->rc.projected_frame_size = (bytes_used << 3); + rc->projected_frame_size = (bytes_used << 3); // Post encode loop adjustment of Q prediction. - vp9_rc_update_rate_correction_factors( - cpi, (cpi->sf.recode_loop || + vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop || cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { - cpi->rc.last_q[KEY_FRAME] = cm->base_qindex; - cpi->rc.avg_frame_qindex[KEY_FRAME] = - (2 + 3 * cpi->rc.avg_frame_qindex[KEY_FRAME] + cm->base_qindex) >> 2; - } else if (!cpi->rc.is_src_frame_alt_ref && + rc->last_q[KEY_FRAME] = cm->base_qindex; + rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); + } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - cpi->rc.last_q[2] = cm->base_qindex; - cpi->rc.avg_frame_qindex[2] = - (2 + 3 * cpi->rc.avg_frame_qindex[2] + cm->base_qindex) >> 2; + rc->last_q[2] = cm->base_qindex; + rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); } else { - cpi->rc.last_q[INTER_FRAME] = cm->base_qindex; - cpi->rc.avg_frame_qindex[INTER_FRAME] = - (2 + 3 * cpi->rc.avg_frame_qindex[INTER_FRAME] + - cm->base_qindex) >> 2; - cpi->rc.ni_frames++; - cpi->rc.tot_q += vp9_convert_qindex_to_q(cm->base_qindex); - cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames; + rc->last_q[INTER_FRAME] = cm->base_qindex; + rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO( + 3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2); + rc->ni_frames++; + rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex); + rc->avg_q = rc->tot_q / (double)rc->ni_frames; // Calculate the average Q for normal inter frames (not key or GFU frames). - cpi->rc.ni_tot_qi += cm->base_qindex; - cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames; + rc->ni_tot_qi += cm->base_qindex; + rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; } // Keep record of last boosted (KF/KF/ARF) Q value. @@ -903,38 +902,34 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // If all mbs in this group are skipped only update if the Q value is // better than that already stored. // This is used to help set quality in forced key frames to reduce popping - if ((cm->base_qindex < cpi->rc.last_boosted_qindex) || + if ((cm->base_qindex < rc->last_boosted_qindex) || ((cpi->static_mb_pct < 100) && ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)))) { - cpi->rc.last_boosted_qindex = cm->base_qindex; + (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { + rc->last_boosted_qindex = cm->base_qindex; } - vp9_update_buffer_level(cpi, cpi->rc.projected_frame_size); + vp9_update_buffer_level(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. if (cm->frame_type != KEY_FRAME) { - cpi->rc.rolling_target_bits = - ((cpi->rc.rolling_target_bits * 3) + - cpi->rc.this_frame_target + 2) / 4; - cpi->rc.rolling_actual_bits = - ((cpi->rc.rolling_actual_bits * 3) + - cpi->rc.projected_frame_size + 2) / 4; - cpi->rc.long_rolling_target_bits = - ((cpi->rc.long_rolling_target_bits * 31) + - cpi->rc.this_frame_target + 16) / 32; - cpi->rc.long_rolling_actual_bits = - ((cpi->rc.long_rolling_actual_bits * 31) + - cpi->rc.projected_frame_size + 16) / 32; + rc->rolling_target_bits = ROUND_POWER_OF_TWO( + rc->rolling_target_bits * 3 + rc->this_frame_target, 2); + rc->rolling_actual_bits = ROUND_POWER_OF_TWO( + rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2); + rc->long_rolling_target_bits = ROUND_POWER_OF_TWO( + rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5); + rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO( + rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5); } // Actual bits spent - cpi->rc.total_actual_bits += cpi->rc.projected_frame_size; + rc->total_actual_bits += rc->projected_frame_size; // Debug stats - cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target - - cpi->rc.projected_frame_size); + rc->total_target_vs_actual += (rc->this_frame_target - + rc->projected_frame_size); #ifndef DISABLE_RC_LONG_TERM_MEM // Update bits left to the kf and gf groups to account for overshoot or @@ -952,8 +947,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } #endif - if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame - && (cm->frame_type != KEY_FRAME)) + if (cpi->oxcf.play_alternate && cpi->refresh_alt_ref_frame && + (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); else @@ -961,14 +956,14 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { update_golden_frame_stats(cpi); if (cm->frame_type == KEY_FRAME) - cpi->rc.frames_since_key = 0; + rc->frames_since_key = 0; if (cm->show_frame) { - cpi->rc.frames_since_key++; - cpi->rc.frames_to_key--; + rc->frames_since_key++; + rc->frames_to_key--; } } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { cpi->rc.frames_since_key++; - // cpi->rc.frames_to_key--; + cpi->rc.frames_to_key--; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5ca34795d..b46e80891 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1418,9 +1418,10 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode, } } -void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { - x->e_mbd.mi_8x8[0]->mbmi.mode = mb; - x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int; +void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode, + const MV *mv) { + xd->mi_8x8[0]->mbmi.mode = mode; + xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv; } static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -1638,6 +1639,10 @@ static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, x->e_mbd.plane[0].pre[1] = orig_pre[1]; } +static INLINE int mv_has_subpel(const MV *mv) { + return (mv->row & 0x0F) || (mv->col & 0x0F); +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, BEST_SEG_INFO *bsi_buf, int filter_idx, @@ -1931,15 +1936,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (filter_idx > 0) { BEST_SEG_INFO *ref_bsi = bsi_buf; - subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) || - (mode_mv[this_mode].as_mv.col & 0x0f); + subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv); have_ref = mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; + ref_bsi->rdstat[i][mode_idx].mvs[0].as_int; if (has_second_rf) { - subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) || - (second_mode_mv[this_mode].as_mv.col & 0x0f); - have_ref &= second_mode_mv[this_mode].as_int == - ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; + subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv); + have_ref &= second_mode_mv[this_mode].as_int == + ref_bsi->rdstat[i][mode_idx].mvs[1].as_int; } if (filter_idx > 1 && !subpelmv && !have_ref) { @@ -2276,14 +2279,14 @@ static void setup_pred_block(const MACROBLOCKD *xd, } } -static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, - const TileInfo *const tile, - int idx, MV_REFERENCE_FRAME frame_type, - BLOCK_SIZE block_size, - int mi_row, int mi_col, - int_mv frame_nearest_mv[MAX_REF_FRAMES], - int_mv frame_near_mv[MAX_REF_FRAMES], - struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { +void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + int idx, MV_REFERENCE_FRAME frame_type, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { VP9_COMMON *cm = &cpi->common; YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]]; MACROBLOCKD *const xd = &x->e_mbd; @@ -2770,12 +2773,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, pred_exists = 0; // Are all MVs integer pel for Y and UV - intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && - (mbmi->mv[0].as_mv.col & 15) == 0; + intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); if (is_comp_pred) - intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 && - (mbmi->mv[1].as_mv.col & 15) == 0; - + intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv); // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used @@ -3175,17 +3175,29 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = INT_MAX; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), - ref_frame, block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); + vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + cpi->ref_frame_mask = 0; + for (ref_frame = LAST_FRAME; + ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { + cpi->ref_frame_mask |= (1 << ref_frame); + break; + } + } + } + for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3235,8 +3247,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) + if (cpi->ref_frame_mask & (1 << ref_frame) && this_mode != NEWMV) continue; // Test best rd so far against threshold for trying this mode. @@ -3641,11 +3652,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame[0]); - // Flag all modes that have a distortion thats > 2x the best we found at // this level. for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { @@ -3797,15 +3803,27 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), - ref_frame, block_size, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - yv12_mb); + vp9_setup_buffer_inter(cpi, x, tile, get_ref_frame_idx(cpi, ref_frame), + ref_frame, block_size, mi_row, mi_col, + frame_mv[NEARESTMV], frame_mv[NEARMV], + yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } + cpi->ref_frame_mask = 0; + for (ref_frame = LAST_FRAME; + ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { + int i; + for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { + if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { + cpi->ref_frame_mask |= (1 << ref_frame); + break; + } + } + } + for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3853,11 +3871,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; } - // Skip if the current reference frame has been masked off - if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask && - (cpi->ref_frame_mask & (1 << ref_frame))) - continue; - // Test best rd so far against threshold for trying this mode. if ((best_rd < ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * @@ -4367,11 +4380,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - // If we are using reference masking and the set mask flag is set then - // create the reference frame mask. - if (cpi->sf.reference_masking && cpi->set_ref_frame_mask) - cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame[0]); - if (best_rd == INT64_MAX && bsize < BLOCK_8X8) { *returnrate = INT_MAX; *returndistortion = INT_MAX; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index f0e8849c1..4b244a50a 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -27,6 +27,15 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, + const TileInfo *const tile, + int idx, MV_REFERENCE_FRAME frame_type, + BLOCK_SIZE block_size, + int mi_row, int mi_col, + int_mv frame_nearest_mv[MAX_REF_FRAMES], + int_mv frame_near_mv[MAX_REF_FRAMES], + struct buf_2d yv12_mb[4][MAX_MB_PLANE]); + void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); @@ -51,8 +60,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, void vp9_init_me_luts(); -void vp9_set_mbmode_and_mvs(MACROBLOCK *x, - MB_PREDICTION_MODE mb, int_mv *mv); +void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode, + const MV *mv); void vp9_get_entropy_contexts(TX_SIZE tx_size, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16], |