diff options
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 43 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 198 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 253 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 49 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 171 |
6 files changed, 350 insertions, 365 deletions
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 930e2f82b..50d803680 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -76,6 +76,19 @@ static int select_cq_level(int qindex) { return ret_val; } +static int gfboost_qadjust(int qindex) { + const double q = vp9_convert_qindex_to_q(qindex); + return (int)((0.00000828 * q * q * q) + + (-0.0055 * q * q) + + (1.32 * q) + 79.3); +} + +static int kfboost_qadjust(int qindex) { + const double q = vp9_convert_qindex_to_q(qindex); + return (int)((0.00000973 * q * q * q) + + (-0.00613 * q * q) + + (1.316 * q) + 121.2); +} // Resets the first pass file to the given position using a relative seek from // the current position. @@ -928,11 +941,11 @@ static int64_t estimate_modemvcost(VP9_COMP *cpi, intra_cost = bitcost(av_intra); // Estimate of extra bits per mv overhead for mbs - // << 9 is the normalization to the (bits * 512) used in vp9_bits_per_mb + // << 9 is the normalization to the (bits * 512) used in vp9_rc_bits_per_mb mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; // Crude estimate of overhead cost from modes - // << 9 is the normalization to (bits * 512) used in vp9_bits_per_mb + // << 9 is the normalization to (bits * 512) used in vp9_rc_bits_per_mb mode_cost = (int)((((av_pct_inter - av_pct_motion) * zz_cost) + (av_pct_motion * motion_cost) + @@ -1052,8 +1065,8 @@ static int estimate_max_q(VP9_COMP *cpi, sr_correction * speed_correction * cpi->twopass.est_max_qcorrection_factor; - bits_per_mb_at_this_q = vp9_bits_per_mb(INTER_FRAME, q, - err_correction_factor); + bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q, + err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; @@ -1140,7 +1153,7 @@ static int estimate_cq(VP9_COMP *cpi, sr_correction * speed_correction * clip_iifactor; bits_per_mb_at_this_q = - vp9_bits_per_mb(INTER_FRAME, q, err_correction_factor); + vp9_rc_bits_per_mb(INTER_FRAME, q, err_correction_factor); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; @@ -1936,7 +1949,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int q = cpi->rc.last_q[INTER_FRAME]; int gf_bits; - int boost = (cpi->rc.gfu_boost * vp9_gfboost_qadjust(q)) / 100; + int boost = (cpi->rc.gfu_boost * gfboost_qadjust(q)) / 100; // Set max and minimum boost and hence minimum allocation boost = clamp(boost, 125, (cpi->rc.baseline_gf_interval + 1) * 200); @@ -2728,3 +2741,21 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // sizes. cpi->twopass.modified_error_left -= kf_group_err; } + +void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { +#ifdef DISABLE_RC_LONG_TERM_MEM + cpi->twopass.bits_left -= cpi->rc.this_frame_target; +#else + cpi->twopass.bits_left -= 8 * bytes_used; +#endif + if (!cpi->refresh_alt_ref_frame) { + double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.framerate; + double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * + cpi->oxcf.two_pass_vbrmin_section + / 100); + if (two_pass_min_rate < lower_bounds_min_rate) + two_pass_min_rate = lower_bounds_min_rate; + cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / + cpi->oxcf.framerate); + } +} diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 7ba5bc47d..b0314475f 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -161,7 +161,7 @@ void vp9_initialize_enc() { vp9_tokenize_initialize(); vp9_init_quant_tables(); vp9_init_me_luts(); - vp9_init_minq_luts(); + vp9_rc_init_minq_luts(); // init_base_skip_probs(); vp9_entropy_mv_init(); vp9_entropy_mode_init(); @@ -268,8 +268,8 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, vp9_clear_system_state(); // Look up the current projected bits per block for the base index - base_bits_per_mb = vp9_bits_per_mb(cpi->common.frame_type, - base_q_index, 1.0); + base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type, + base_q_index, 1.0); // Find the target bits per mb based on the base value and given ratio. target_bits_per_mb = rate_target_ratio * base_bits_per_mb; @@ -277,8 +277,8 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, // Convert the q target to an index for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; i++) { target_index = i; - if (vp9_bits_per_mb(cpi->common.frame_type, - i, 1.0) <= target_bits_per_mb ) + if (vp9_rc_bits_per_mb(cpi->common.frame_type, + i, 1.0) <= target_bits_per_mb ) break; } @@ -2686,7 +2686,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { static void encode_with_recode_loop(VP9_COMP *cpi, unsigned long *size, uint8_t *dest, - int q, + int *q, int bottom_index, int top_index, int frame_over_shoot_limit, @@ -2700,7 +2700,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, do { vp9_clear_system_state(); // __asm emms; - vp9_set_quantizer(cpi, q); + vp9_set_quantizer(cpi, *q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2753,7 +2753,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } else { // Special case handling for forced key frames if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { - int last_q = q; + int last_q = *q; int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; @@ -2769,32 +2769,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi, (kf_err > low_err_target && cpi->rc.projected_frame_size <= frame_under_shoot_limit)) { // Lower q_high - q_high = q > q_low ? q - 1 : q_low; + q_high = *q > q_low ? *q - 1 : q_low; // Adjust Q - q = (q * high_err_target) / kf_err; - q = MIN(q, (q_high + q_low) >> 1); + *q = ((*q) * high_err_target) / kf_err; + *q = MIN((*q), (q_high + q_low) >> 1); } else if (kf_err < low_err_target && cpi->rc.projected_frame_size >= frame_under_shoot_limit) { // The key frame is much better than the previous frame // Raise q_low - q_low = q < q_high ? q + 1 : q_high; + q_low = *q < q_high ? *q + 1 : q_high; // Adjust Q - q = (q * low_err_target) / kf_err; - q = MIN(q, (q_high + q_low + 1) >> 1); + *q = ((*q) * low_err_target) / kf_err; + *q = MIN((*q), (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); + *q = clamp(*q, q_low, q_high); - loop = q != last_q; + loop = *q != last_q; } else if (recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, - q, top_index, bottom_index)) { + *q, top_index, bottom_index)) { // Is the projected frame size out of range and are we allowed // to attempt to recode. - int last_q = q; + int last_q = *q; int retries = 0; // Frame size out of permitted range: @@ -2803,22 +2803,22 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // Frame is too large if (cpi->rc.projected_frame_size > cpi->rc.this_frame_target) { // Raise Qlow as to at least the current value - q_low = q < q_high ? q + 1 : q_high; + q_low = *q < q_high ? *q + 1 : q_high; if (undershoot_seen || loop_count > 1) { // Update rate_correction_factor unless - vp9_update_rate_correction_factors(cpi, 1); + vp9_rc_update_rate_correction_factors(cpi, 1); - q = (q_high + q_low + 1) / 2; + *q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless - vp9_update_rate_correction_factors(cpi, 0); + vp9_rc_update_rate_correction_factors(cpi, 0); - q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); + *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target); - while (q < q_low && retries < 10) { - vp9_update_rate_correction_factors(cpi, 0); - q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); + while (*q < q_low && retries < 10) { + vp9_rc_update_rate_correction_factors(cpi, 0); + *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target); retries++; } } @@ -2826,32 +2826,33 @@ static void encode_with_recode_loop(VP9_COMP *cpi, overshoot_seen = 1; } else { // Frame is too small - q_high = q > q_low ? q - 1 : q_low; + q_high = *q > q_low ? *q - 1 : q_low; if (overshoot_seen || loop_count > 1) { // Update rate_correction_factor unless // cpi->rc.active_worst_quality has changed. - vp9_update_rate_correction_factors(cpi, 1); + vp9_rc_update_rate_correction_factors(cpi, 1); - q = (q_high + q_low) / 2; + *q = (q_high + q_low) / 2; } else { // Update rate_correction_factor unless // cpi->rc.active_worst_quality has changed. - vp9_update_rate_correction_factors(cpi, 0); + vp9_rc_update_rate_correction_factors(cpi, 0); - q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); + *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target); // Special case reset for qlow for constrained quality. // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && q < q_low) { - q_low = q; + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + *q < q_low) { + q_low = *q; } - while (q > q_high && retries < 10) { - vp9_update_rate_correction_factors(cpi, 0); - q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); + while (*q > q_high && retries < 10) { + vp9_rc_update_rate_correction_factors(cpi, 0); + *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target); retries++; } } @@ -2860,9 +2861,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); + *q = clamp(*q, q_low, q_high); - loop = q != last_q; + loop = *q != last_q; } else { loop = 0; } @@ -3003,15 +3004,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, configure_static_seg_features(cpi); } - // Decide how big to make the frame. - vp9_pick_frame_size(cpi); - vp9_clear_system_state(); - q = vp9_pick_q_and_adjust_q_bounds(cpi, &bottom_index, &top_index); + // Decide how big to make the frame. + vp9_rc_pick_frame_size_and_bounds(cpi, + &frame_under_shoot_limit, + &frame_over_shoot_limit); - vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, - &frame_over_shoot_limit); + q = vp9_rc_pick_q_and_adjust_q_bounds(cpi, + &bottom_index, + &top_index); #if CONFIG_MULTIPLE_ARF // Force the quantizer determined by the coding order pattern. @@ -3075,7 +3077,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, encode_with_recode_loop(cpi, size, dest, - q, + &q, bottom_index, top_index, frame_over_shoot_limit, @@ -3157,106 +3159,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, * needed in motion search besides loopfilter */ cm->last_frame_type = cm->frame_type; - // Update rate control heuristics - cpi->rc.projected_frame_size = (*size) << 3; - - // Post encode loop adjustment of Q prediction. - vp9_update_rate_correction_factors( - cpi, (cpi->sf.recode_loop || - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); - - - cpi->rc.last_q[cm->frame_type] = cm->base_qindex; - - // Keep record of last boosted (KF/KF/ARF) Q value. - // If the current frame is coded at a lower Q then we also update it. - // If all mbs in this group are skipped only update if the Q value is - // better than that already stored. - // This is used to help set quality in forced key frames to reduce popping - if ((cm->base_qindex < cpi->rc.last_boosted_qindex) || - ((cpi->static_mb_pct < 100) && - ((cm->frame_type == KEY_FRAME) || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) { - cpi->rc.last_boosted_qindex = cm->base_qindex; - } - - if (cm->frame_type == KEY_FRAME) { - vp9_adjust_key_frame_context(cpi); - } - - // Keep a record of ambient average Q. - if (cm->frame_type != KEY_FRAME) - cpi->rc.avg_frame_qindex = (2 + 3 * cpi->rc.avg_frame_qindex + - cm->base_qindex) >> 2; - - // Keep a record from which we can calculate the average Q excluding GF - // updates and key frames. - if (cm->frame_type != KEY_FRAME && - !cpi->refresh_golden_frame && - !cpi->refresh_alt_ref_frame) { - cpi->rc.ni_frames++; - cpi->rc.tot_q += vp9_convert_qindex_to_q(q); - cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames; - - // Calculate the average Q for normal inter frames (not key or GFU frames). - cpi->rc.ni_tot_qi += q; - cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames; - } - - // Update the buffer level variable. - // Non-viewable frames are a special case and are treated as pure overhead. - if (!cm->show_frame) - cpi->rc.bits_off_target -= cpi->rc.projected_frame_size; - else - cpi->rc.bits_off_target += cpi->rc.av_per_frame_bandwidth - - cpi->rc.projected_frame_size; - - // Clip the buffer level at the maximum buffer size - if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) - cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size; - - // Rolling monitors of whether we are over or underspending used to help - // regulate min and Max Q in two pass. - if (cm->frame_type != KEY_FRAME) { - cpi->rc.rolling_target_bits = - ((cpi->rc.rolling_target_bits * 3) + - cpi->rc.this_frame_target + 2) / 4; - cpi->rc.rolling_actual_bits = - ((cpi->rc.rolling_actual_bits * 3) + - cpi->rc.projected_frame_size + 2) / 4; - cpi->rc.long_rolling_target_bits = - ((cpi->rc.long_rolling_target_bits * 31) + - cpi->rc.this_frame_target + 16) / 32; - cpi->rc.long_rolling_actual_bits = - ((cpi->rc.long_rolling_actual_bits * 31) + - cpi->rc.projected_frame_size + 16) / 32; - } - - // Actual bits spent - cpi->rc.total_actual_bits += cpi->rc.projected_frame_size; - - // Debug stats - cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target - - cpi->rc.projected_frame_size); - - cpi->rc.buffer_level = cpi->rc.bits_off_target; - -#ifndef DISABLE_RC_LONG_TERM_MEM - // Update bits left to the kf and gf groups to account for overshoot or - // undershoot on these frames - if (cm->frame_type == KEY_FRAME) { - cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - - cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); - } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { - cpi->twopass.gf_group_bits += cpi->rc.this_frame_target - - cpi->rc.projected_frame_size; - - cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); - } -#endif + vp9_rc_postencode_update(cpi, *size, q); #if 0 output_frame_level_debug_stats(cpi); @@ -3386,6 +3289,10 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, encode_frame_to_data_rate(cpi, size, dest, frame_flags); // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt"); + + vp9_twopass_postencode_update(cpi, *size); + + /* #ifdef DISABLE_RC_LONG_TERM_MEM cpi->twopass.bits_left -= cpi->rc.this_frame_target; #else @@ -3404,6 +3311,7 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size, cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->oxcf.framerate); } + */ } static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) { diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 03002ef4b..b8602e094 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -30,7 +30,6 @@ #include "vp9/encoder/vp9_lookahead.h" #define DISABLE_RC_LONG_TERM_MEM 0 - // #define MODE_TEST_HIT_STATS // #define SPEEDSTATS 1 diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 42372e56c..bf1fc4f31 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -26,6 +26,8 @@ #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" +#define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1 + #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 @@ -67,7 +69,7 @@ static int calculate_minq_index(double maxq, return QINDEX_RANGE - 1; } -void vp9_init_minq_luts(void) { +void vp9_rc_init_minq_luts(void) { int i; for (i = 0; i < QINDEX_RANGE; i++) { @@ -121,22 +123,8 @@ double vp9_convert_qindex_to_q(int qindex) { return vp9_ac_quant(qindex, 0) / 4.0; } -int vp9_gfboost_qadjust(int qindex) { - const double q = vp9_convert_qindex_to_q(qindex); - return (int)((0.00000828 * q * q * q) + - (-0.0055 * q * q) + - (1.32 * q) + 79.3); -} - -static int kfboost_qadjust(int qindex) { - const double q = vp9_convert_qindex_to_q(qindex); - return (int)((0.00000973 * q * q * q) + - (-0.00613 * q * q) + - (1.316 * q) + 121.2); -} - -int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex, - double correction_factor) { +int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, + double correction_factor) { const double q = vp9_convert_qindex_to_q(qindex); int enumerator = frame_type == KEY_FRAME ? 3300000 : 2250000; @@ -213,7 +201,7 @@ void vp9_setup_inter_frame(VP9_COMP *cpi) { static int estimate_bits_at_q(int frame_kind, int q, int mbs, double correction_factor) { - const int bpm = (int)(vp9_bits_per_mb(frame_kind, q, correction_factor)); + const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor)); // Attempt to retain reasonable accuracy without overflow. The cutoff is // chosen such that the maximum product of Bpm and MBs fits 31 bits. The @@ -240,15 +228,9 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { if (target > max_rate) target = max_rate; } - cpi->rc.this_frame_target = target; - - // Target rate per SB64 (including partial SB64s. - cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / - (cpi->common.width * cpi->common.height); } - // Do the best we can to define the parameters for the next GF based // on what information we have available. // @@ -273,11 +255,6 @@ static void calc_pframe_target_size(VP9_COMP *cpi) { cpi->rc.this_frame_target = cpi->rc.per_frame_bandwidth; } - // Target rate per SB64 (including partial SB64s. - cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / - (cpi->common.width * cpi->common.height); - - // Check that the total sum of adjustments is not above the maximum allowed. // That is, having allowed for the KF and GF penalties, we have not pushed // the current inter-frame target too low. If the adjustment we apply here is @@ -309,7 +286,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) { } -void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { +void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { const int q = cpi->common.base_qindex; int correction_factor = 100; double rate_correction_factor; @@ -390,7 +367,7 @@ void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { } -int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) { +int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) { int q = cpi->rc.active_worst_quality; int i; @@ -422,8 +399,8 @@ int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) { i = cpi->rc.active_best_quality; do { - bits_per_mb_at_this_q = (int)vp9_bits_per_mb(cpi->common.frame_type, i, - correction_factor); + bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i, + correction_factor); if (bits_per_mb_at_this_q <= target_bits_per_mb) { if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) @@ -461,8 +438,9 @@ static int get_active_quality(int q, return active_best_quality; } -int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, - int * bottom_index, int * top_index) { +int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, + int *bottom_index, + int *top_index) { // Set an active best quality and if necessary active worst quality int q = cpi->rc.active_worst_quality; VP9_COMMON *const cm = &cpi->common; @@ -481,7 +459,12 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, (last_boosted_q * 0.75)); cpi->rc.active_best_quality = MAX(qindex + delta_qindex, - cpi->rc.best_quality); + cpi->rc.best_quality); + } else if (cpi->pass == 0 && cpi->common.current_video_frame == 0) { + // If this is the first (key) frame in 1-pass, active best/worst is + // the user best/worst-allowed, and leave the top_index to active_worst. + cpi->rc.active_best_quality = cpi->oxcf.best_allowed_q; + cpi->rc.active_worst_quality = cpi->oxcf.worst_allowed_q; } else { int high = 5000; int low = 400; @@ -490,9 +473,9 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, // Baseline value derived from cpi->active_worst_quality and kf boost cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.kf_boost, - low, high, - kf_low_motion_minq, - kf_high_motion_minq); + low, high, + kf_low_motion_minq, + kf_high_motion_minq); // Allow somewhat lower kf minq with small image formats. if ((cm->width * cm->height) <= (352 * 288)) { @@ -533,14 +516,14 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, q = cpi->cq_target_quality; if (cpi->frames_since_key > 1) { cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost, - low, high, - afq_low_motion_minq, - afq_high_motion_minq); + low, high, + afq_low_motion_minq, + afq_high_motion_minq); } else { cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost, - low, high, - gf_low_motion_minq, - gf_high_motion_minq); + low, high, + gf_low_motion_minq, + gf_high_motion_minq); } // Constrained quality use slightly lower active best. cpi->rc.active_best_quality = cpi->rc.active_best_quality * 15 / 16; @@ -550,22 +533,19 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, cpi->rc.active_best_quality = cpi->cq_target_quality; } else { if (cpi->frames_since_key > 1) { - cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost, - low, high, - afq_low_motion_minq, - afq_high_motion_minq); + cpi->rc.active_best_quality = get_active_quality( + q, cpi->rc.gfu_boost, low, high, + afq_low_motion_minq, afq_high_motion_minq); } else { - cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost, - low, high, - gf_low_motion_minq, - gf_high_motion_minq); + cpi->rc.active_best_quality = get_active_quality( + q, cpi->rc.gfu_boost, low, high, + gf_low_motion_minq, gf_high_motion_minq); } } } else { - cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost, - low, high, - gf_low_motion_minq, - gf_high_motion_minq); + cpi->rc.active_best_quality = get_active_quality( + q, cpi->rc.gfu_boost, low, high, + gf_low_motion_minq, gf_high_motion_minq); } } else { if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { @@ -605,25 +585,23 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, if (cpi->rc.active_worst_quality < cpi->rc.active_best_quality) cpi->rc.active_worst_quality = cpi->rc.active_best_quality; + *top_index = cpi->rc.active_worst_quality; + *bottom_index = cpi->rc.active_best_quality; + +#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY // Limit Q range for the adaptive loop. if (cm->frame_type == KEY_FRAME && !cpi->this_key_frame_forced) { - *top_index = - (cpi->rc.active_worst_quality + cpi->rc.active_best_quality * 3) / 4; - // If this is the first (key) frame in 1-pass, active best is the user - // best-allowed, and leave the top_index to active_worst. - if (cpi->pass == 0 && cpi->common.current_video_frame == 0) { - cpi->rc.active_best_quality = cpi->oxcf.best_allowed_q; - *top_index = cpi->oxcf.worst_allowed_q; + if (!(cpi->pass == 0 && cpi->common.current_video_frame == 0)) { + *top_index = + (cpi->rc.active_worst_quality + cpi->rc.active_best_quality * 3) / 4; } } else if (!cpi->is_src_frame_alt_ref && (cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { *top_index = (cpi->rc.active_worst_quality + cpi->rc.active_best_quality) / 2; - } else { - *top_index = cpi->rc.active_worst_quality; } - *bottom_index = cpi->rc.active_best_quality; +#endif if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { q = cpi->rc.active_best_quality; @@ -636,14 +614,13 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, // 1-pass: for now, use per-frame-bw for target size of frame, scaled // by |x| for key frame. int scale = (cm->frame_type == KEY_FRAME) ? 5 : 1; - q = vp9_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth); + q = vp9_rc_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth); } else { - q = vp9_regulate_q(cpi, cpi->rc.this_frame_target); + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target); } if (q > *top_index) q = *top_index; } - return q; } @@ -695,7 +672,7 @@ static int estimate_keyframe_frequency(VP9_COMP *cpi) { } -void vp9_adjust_key_frame_context(VP9_COMP *cpi) { +static void adjust_key_frame_context(VP9_COMP *cpi) { // Clear down mmx registers to allow floating point in what follows vp9_clear_system_state(); @@ -704,28 +681,30 @@ void vp9_adjust_key_frame_context(VP9_COMP *cpi) { } -void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit, - int *frame_over_shoot_limit) { +static void compute_frame_size_bounds(const VP9_COMP *cpi, + int this_frame_target, + int *frame_under_shoot_limit, + int *frame_over_shoot_limit) { // Set-up bounds on acceptable frame size: if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { if (cpi->common.frame_type == KEY_FRAME) { - *frame_over_shoot_limit = cpi->rc.this_frame_target * 9 / 8; - *frame_under_shoot_limit = cpi->rc.this_frame_target * 7 / 8; + *frame_over_shoot_limit = this_frame_target * 9 / 8; + *frame_under_shoot_limit = this_frame_target * 7 / 8; } else { if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) { - *frame_over_shoot_limit = cpi->rc.this_frame_target * 9 / 8; - *frame_under_shoot_limit = cpi->rc.this_frame_target * 7 / 8; + *frame_over_shoot_limit = this_frame_target * 9 / 8; + *frame_under_shoot_limit = this_frame_target * 7 / 8; } else { // Stron overshoot limit for constrained quality if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { - *frame_over_shoot_limit = cpi->rc.this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->rc.this_frame_target * 2 / 8; + *frame_over_shoot_limit = this_frame_target * 11 / 8; + *frame_under_shoot_limit = this_frame_target * 2 / 8; } else { - *frame_over_shoot_limit = cpi->rc.this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->rc.this_frame_target * 5 / 8; + *frame_over_shoot_limit = this_frame_target * 11 / 8; + *frame_under_shoot_limit = this_frame_target * 5 / 8; } } } @@ -740,9 +719,10 @@ void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit, } } - // return of 0 means drop frame -int vp9_pick_frame_size(VP9_COMP *cpi) { +int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi, + int *frame_under_shoot_limit, + int *frame_over_shoot_limit) { VP9_COMMON *cm = &cpi->common; if (cm->frame_type == KEY_FRAME) @@ -750,5 +730,112 @@ int vp9_pick_frame_size(VP9_COMP *cpi) { else calc_pframe_target_size(cpi); + // Target rate per SB64 (including partial SB64s. + cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / + (cpi->common.width * cpi->common.height); + compute_frame_size_bounds(cpi, cpi->rc.this_frame_target, + frame_under_shoot_limit, frame_over_shoot_limit); + return 1; } + +void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used, int q) { + VP9_COMMON *const cm = &cpi->common; + // Update rate control heuristics + cpi->rc.projected_frame_size = (bytes_used << 3); + + // Post encode loop adjustment of Q prediction. + vp9_rc_update_rate_correction_factors( + cpi, (cpi->sf.recode_loop || + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + + cpi->rc.last_q[cm->frame_type] = cm->base_qindex; + + // Keep record of last boosted (KF/KF/ARF) Q value. + // If the current frame is coded at a lower Q then we also update it. + // If all mbs in this group are skipped only update if the Q value is + // better than that already stored. + // This is used to help set quality in forced key frames to reduce popping + if ((cm->base_qindex < cpi->rc.last_boosted_qindex) || + ((cpi->static_mb_pct < 100) && + ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) { + cpi->rc.last_boosted_qindex = cm->base_qindex; + } + + if (cm->frame_type == KEY_FRAME) { + adjust_key_frame_context(cpi); + } + + // Keep a record of ambient average Q. + if (cm->frame_type != KEY_FRAME) + cpi->rc.avg_frame_qindex = (2 + 3 * cpi->rc.avg_frame_qindex + + cm->base_qindex) >> 2; + + // Keep a record from which we can calculate the average Q excluding GF + // updates and key frames. + if (cm->frame_type != KEY_FRAME && + !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) { + cpi->rc.ni_frames++; + cpi->rc.tot_q += vp9_convert_qindex_to_q(q); + cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames; + + // Calculate the average Q for normal inter frames (not key or GFU frames). + cpi->rc.ni_tot_qi += q; + cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames; + } + + // Update the buffer level variable. + // Non-viewable frames are a special case and are treated as pure overhead. + if (!cm->show_frame) + cpi->rc.bits_off_target -= cpi->rc.projected_frame_size; + else + cpi->rc.bits_off_target += cpi->rc.av_per_frame_bandwidth - + cpi->rc.projected_frame_size; + + // Clip the buffer level at the maximum buffer size + if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) + cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size; + + // Rolling monitors of whether we are over or underspending used to help + // regulate min and Max Q in two pass. + if (cm->frame_type != KEY_FRAME) { + cpi->rc.rolling_target_bits = + ((cpi->rc.rolling_target_bits * 3) + + cpi->rc.this_frame_target + 2) / 4; + cpi->rc.rolling_actual_bits = + ((cpi->rc.rolling_actual_bits * 3) + + cpi->rc.projected_frame_size + 2) / 4; + cpi->rc.long_rolling_target_bits = + ((cpi->rc.long_rolling_target_bits * 31) + + cpi->rc.this_frame_target + 16) / 32; + cpi->rc.long_rolling_actual_bits = + ((cpi->rc.long_rolling_actual_bits * 31) + + cpi->rc.projected_frame_size + 16) / 32; + } + + // Actual bits spent + cpi->rc.total_actual_bits += cpi->rc.projected_frame_size; + + // Debug stats + cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target - + cpi->rc.projected_frame_size); + + cpi->rc.buffer_level = cpi->rc.bits_off_target; + +#ifndef DISABLE_RC_LONG_TERM_MEM + // Update bits left to the kf and gf groups to account for overshoot or + // undershoot on these frames + if (cm->frame_type == KEY_FRAME) { + cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - + cpi->rc.projected_frame_size; + + cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0); + } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) { + cpi->twopass.gf_group_bits += cpi->rc.this_frame_target - + cpi->rc.projected_frame_size; + + cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); + } +#endif +} diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 13357447a..f01d18672 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -20,24 +20,41 @@ void vp9_save_coding_context(VP9_COMP *cpi); void vp9_restore_coding_context(VP9_COMP *cpi); void vp9_setup_key_frame(VP9_COMP *cpi); -void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); -int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame); -void vp9_adjust_key_frame_context(VP9_COMP *cpi); -void vp9_compute_frame_size_bounds(VP9_COMP *cpi, - int *frame_under_shoot_limit, - int *frame_over_shoot_limit); +void vp9_setup_inter_frame(VP9_COMP *cpi); -void vp9_init_minq_luts(void); +double vp9_convert_qindex_to_q(int qindex); -// return of 0 means drop frame -int vp9_pick_frame_size(VP9_COMP *cpi); +// Updates rate correction factors +void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); -double vp9_convert_qindex_to_q(int qindex); -int vp9_gfboost_qadjust(int qindex); -int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex, - double correction_factor); -void vp9_setup_inter_frame(VP9_COMP *cpi); -int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, - int * bottom_index, int * top_index); +// initialize luts for minq +void vp9_rc_init_minq_luts(void); + +// return of 0 means drop frame +// Changes rc.this_frame_target and rc.sb64_rate_target +int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi, + int *frame_under_shoot_limit, + int *frame_over_shoot_limit); +// Picks q and q bounds given the target for bits +int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi, + int * bottom_index, + int * top_index); + +// Estimates q to achieve a target bits per frame +int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame); + +// Post encode update of the rate control parameters based +// on bytes used and q used for the frame +void vp9_rc_postencode_update(VP9_COMP *cpi, + uint64_t bytes_used, + int q_used); + +// estimates bits per mb for a given qindex and correction factor +int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, + double correction_factor); + +// Post encode update of the rate control parameters for 2-pass +void vp9_twopass_postencode_update(VP9_COMP *cpi, + uint64_t bytes_used); #endif // VP9_ENCODER_VP9_RATECTRL_H_ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index ee9c18b21..84b71224e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -762,32 +762,6 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, cpi->tx_stepdown_count[0]++; } -static TX_SIZE select_tx_size(TX_MODE tx_mode, TX_SIZE max_tx_size, - int64_t rd[][2]) { - if (max_tx_size == TX_32X32 && - (tx_mode == ALLOW_32X32 || - (tx_mode == TX_MODE_SELECT && - rd[TX_32X32][1] < rd[TX_16X16][1] && - rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]))) { - return TX_32X32; - } else if (max_tx_size >= TX_16X16 && - (tx_mode == ALLOW_16X16 || - tx_mode == ALLOW_32X32 || - (tx_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1]))) { - return TX_16X16; - } else if (tx_mode == ALLOW_8X8 || - tx_mode == ALLOW_16X16 || - tx_mode == ALLOW_32X32 || - (tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { - return TX_8X8; - } else { - return TX_4X4; - } -} - static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int (*r)[2], int *rate, int64_t *d, int64_t *distortion, @@ -802,39 +776,42 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int64_t rd[TX_SIZES][2]; int n, m; int s0, s1; + const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; + int64_t best_rd = INT64_MAX; + TX_SIZE best_tx = TX_4X4; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); - - for (n = TX_4X4; n <= max_tx_size; n++) { - r[n][1] = r[n][0]; - if (r[n][0] == INT_MAX) - continue; - for (m = 0; m <= n - (n == max_tx_size); m++) { - if (m == n) - r[n][1] += vp9_cost_zero(tx_probs[m]); - else - r[n][1] += vp9_cost_one(tx_probs[m]); - } - } - assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + r[n][1] = r[n][0]; + if (r[n][0] < INT_MAX) { + for (m = 0; m <= n - (n == max_tx_size); m++) { + if (m == n) + r[n][1] += vp9_cost_zero(tx_probs[m]); + else + r[n][1] += vp9_cost_one(tx_probs[m]); + } + } if (d[n] == INT64_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; - continue; - } - if (s[n]) { + } else if (s[n]) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } + + if (rd[n][1] < best_rd) { + best_tx = n; + best_rd = rd[n][1]; + } } + mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? + best_tx : MIN(max_tx_size, max_mode_tx_size); - mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd); *distortion = d[mbmi->tx_size]; *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT]; @@ -844,29 +821,18 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, tx_cache[ALLOW_8X8] = rd[TX_8X8][0]; tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0]; tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0]; - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]) - tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) - tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; - else - tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? - rd[TX_4X4][1] : rd[TX_8X8][1]; - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] < rd[TX_16X16][1] && - rd[TX_32X32][1] < rd[TX_8X8][1] && - rd[TX_32X32][1] < rd[TX_4X4][1]) { + if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { + tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; cpi->tx_stepdown_count[0]++; - } else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1]) { + } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { + tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { + tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1]; cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; } else { + tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1]; cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; } } @@ -886,14 +852,17 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, int n, m; int s0, s1; double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00}; - // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00}; + const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; + int64_t best_rd = INT64_MAX; + TX_SIZE best_tx = TX_4X4; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); - - // for (n = TX_4X4; n <= max_txfm_size; n++) - // r[n][0] = (r[n][0] * scale_r[n]); + assert(skip_prob > 0); + s0 = vp9_cost_bit(skip_prob, 0); + s1 = vp9_cost_bit(skip_prob, 1); for (n = TX_4X4; n <= max_tx_size; n++) { + double scale = scale_rd[n]; r[n][1] = r[n][0]; for (m = 0; m <= n - (n == max_tx_size); m++) { if (m == n) @@ -901,40 +870,29 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, else r[n][1] += vp9_cost_one(tx_probs[m]); } - } - - assert(skip_prob > 0); - s0 = vp9_cost_bit(skip_prob, 0); - s1 = vp9_cost_bit(skip_prob, 1); - - for (n = TX_4X4; n <= max_tx_size; n++) { if (s[n]) { - rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); + rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale; } else { - rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); - rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); + rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale; + rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale; + } + if (rd[n][1] < best_rd) { + best_rd = rd[n][1]; + best_tx = n; } - } - for (n = TX_4X4; n <= max_tx_size; n++) { - rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]); - rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]); } - mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd); + mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ? + best_tx : MIN(max_tx_size, max_mode_tx_size); // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); - if (max_tx_size == TX_32X32 && - rd[TX_32X32][1] <= rd[TX_16X16][1] && - rd[TX_32X32][1] <= rd[TX_8X8][1] && - rd[TX_32X32][1] <= rd[TX_4X4][1]) { + if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { cpi->tx_stepdown_count[0]++; - } else if (max_tx_size >= TX_16X16 && - rd[TX_16X16][1] <= rd[TX_8X8][1] && - rd[TX_16X16][1] <= rd[TX_4X4][1]) { + } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) { cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; @@ -954,6 +912,9 @@ static void super_block_yrd(VP9_COMP *cpi, MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack; const int b_inter_mode = is_inter_block(mbmi); + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; + TX_SIZE tx_size; + assert(bs == mbmi->sb_type); if (b_inter_mode) @@ -972,34 +933,16 @@ static void super_block_yrd(VP9_COMP *cpi, if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER && b_inter_mode) { - if (bs >= BLOCK_32X32) - model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd, - &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); - if (bs >= BLOCK_16X16) - model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd, - &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); - - model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd, - &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); - - model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd, - &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]); - + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd, + &r[tx_size][0], &d[tx_size], &s[tx_size]); choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s, skip, sse, ref_best_rd, bs); } else { - if (bs >= BLOCK_32X32) - txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32], - &s[TX_32X32], &sse[TX_32X32], - ref_best_rd, 0, bs, TX_32X32); - if (bs >= BLOCK_16X16) - txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16], - &s[TX_16X16], &sse[TX_16X16], - ref_best_rd, 0, bs, TX_16X16); - txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], - &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8); - txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], - &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4); + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size], + &s[tx_size], &sse[tx_size], + ref_best_rd, 0, bs, tx_size); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, bs); } @@ -2150,7 +2093,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; - unsigned int max_mv = 0; + int max_mv = 0; uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; @@ -3651,7 +3594,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { - int single_rd, hybrid_rd, single_rate, hybrid_rate; + int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; @@ -4402,7 +4345,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { - int single_rd, hybrid_rd, single_rate, hybrid_rate; + int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; |