diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 150 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 98 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 170 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 38 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3.asm | 5 |
8 files changed, 200 insertions, 276 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f5d43d659..442170f0c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) { } // Macroblock activity masking -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { +static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); @@ -673,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); + activity_masking(cpi, x); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; @@ -2188,108 +2188,6 @@ static void switch_tx_mode(VP9_COMP *cpi) { cpi->common.tx_mode = ALLOW_32X32; } -static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - - vp9_zero(cm->counts.switchable_interp); - vp9_zero(cpi->tx_stepdown_count); - - xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; - - xd->last_mi = cm->prev_mi; - - vp9_zero(cm->counts.mv); - vp9_zero(cpi->coef_counts); - vp9_zero(cm->counts.eob_branch); - - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); - - vp9_frame_init_quantizer(cpi); - - vp9_initialize_rd_consts(cpi); - vp9_initialize_me_consts(cpi, cm->base_qindex); - switch_tx_mode(cpi); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. - init_encode_frame_mb_context(cpi); - - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); - - set_prev_mi(cm); - - { - struct vpx_usec_timer emr_timer; - vpx_usec_timer_start(&emr_timer); - - { - // Take tiles into account and give start/end MB - int tile_col, tile_row; - TOKENEXTRA *tp = cpi->tok; - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - TileInfo tile; - TOKENEXTRA *tp_old = tp; - - // For each row of SBs in the frame - vp9_tile_init(&tile, cm, tile_row, tile_col); - for (mi_row = tile.mi_row_start; - mi_row < tile.mi_row_end; mi_row += 8) - encode_sb_row(cpi, &tile, mi_row, &tp); - - cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); - assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); - } - } - } - - vpx_usec_timer_mark(&emr_timer); - cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); - } - - if (cpi->sf.skip_encode_sb) { - int j; - unsigned int intra_count = 0, inter_count = 0; - for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { - intra_count += cm->counts.intra_inter[j][0]; - inter_count += cm->counts.intra_inter[j][1]; - } - cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); - cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); - cpi->sf.skip_encode_frame &= cm->show_frame; - } else { - cpi->sf.skip_encode_frame = 0; - } - -#if 0 - // Keep record of the total distortion this time around for future use - cpi->last_frame_distortion = cpi->frame_distortion; -#endif -} static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; @@ -2579,28 +2477,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &dummy_rate, &dummy_dist, 1); } } +// end RTC play code - -static void encode_rtc_frame_internal(VP9_COMP *cpi) { +static void encode_frame_internal(VP9_COMP *cpi) { int mi_row; - MACROBLOCK * const x = &cpi->mb; - VP9_COMMON * const cm = &cpi->common; - MACROBLOCKD * const xd = &x->e_mbd; + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", // cpi->common.current_video_frame, cpi->common.show_frame, // cm->frame_type); -// debug output -#if DBG_PRNT_SEGMAP - { - FILE *statsfile; - statsfile = fopen("segmap2.stt", "a"); - fprintf(statsfile, "\n"); - fclose(statsfile); - } -#endif - vp9_zero(cm->counts.switchable_interp); vp9_zero(cpi->tx_stepdown_count); @@ -2610,7 +2498,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { xd->last_mi = cm->prev_mi; - vp9_zero(cpi->common.counts.mv); + vp9_zero(cm->counts.mv); vp9_zero(cpi->coef_counts); vp9_zero(cm->counts.eob_branch); @@ -2623,7 +2511,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); switch_tx_mode(cpi); - cpi->sf.always_this_block_size = BLOCK_16X16; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Initialize encode frame context. @@ -2662,9 +2549,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; - mi_row < tile.mi_row_end; mi_row += 8) - encode_rtc_sb_row(cpi, &tile, mi_row, &tp); - + mi_row < tile.mi_row_end; mi_row += 8) { + if (cpi->sf.super_fast_rtc) + encode_rtc_sb_row(cpi, &tile, mi_row, &tp); + else + encode_sb_row(cpi, &tile, mi_row, &tp); + } cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); } @@ -2694,8 +2584,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { cpi->last_frame_distortion = cpi->frame_distortion; #endif } -// end RTC play code - void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -2770,10 +2658,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { select_tx_mode(cpi); cm->reference_mode = reference_mode; - if (cpi->sf.super_fast_rtc) - encode_rtc_frame_internal(cpi); - else - encode_frame_internal(cpi); + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs); @@ -2853,10 +2738,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else { // Force the usage of the BILINEAR interp_filter. cm->interp_filter = BILINEAR; - if (cpi->sf.super_fast_rtc) - encode_rtc_frame_internal(cpi); - else - encode_frame_internal(cpi); + encode_frame_internal(cpi); } } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 838f74e8c..dc35044d6 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, - int section_target_bandwitdh) { +int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, + int section_target_bandwitdh) { int q; const int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; @@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); - const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats, - section_target_bandwidth); + const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats, + section_target_bandwidth); rc->active_worst_quality = tmp_q; rc->ni_av_qi = tmp_q; diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 054ecf811..7e612183e 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -88,6 +88,8 @@ void vp9_end_first_pass(struct VP9_COMP *cpi); void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); void vp9_end_second_pass(struct VP9_COMP *cpi); +int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, + int section_target_bandwitdh); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 917d3a4a2..a81aa0a71 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -846,6 +846,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, } if (speed >= 6) { sf->super_fast_rtc = 1; + sf->always_this_block_size = BLOCK_16X16; } } @@ -1155,14 +1156,17 @@ static void init_layer_context(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; RATE_CONTROL *const lrc = &lc->rc; lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q]; - lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; - lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; + lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality; + lrc->last_q[INTER_FRAME] = lrc->active_worst_quality; lrc->ni_av_qi = lrc->active_worst_quality; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; lrc->tot_q = 0.0; + lrc->avg_q = 0.0; lrc->ni_frames = 0; + lrc->decimation_count = 0; + lrc->decimation_factor = 0; lrc->rate_correction_factor = 1.0; lrc->key_frame_rate_correction_factor = 1.0; lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * @@ -1207,13 +1211,24 @@ static void update_layer_context_change_config(VP9_COMP *const cpi, // for the current layer. static void update_layer_framerate(VP9_COMP *const cpi) { int temporal_layer = cpi->svc.temporal_layer_id; + const VP9_CONFIG *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; RATE_CONTROL *const lrc = &lc->rc; - lc->framerate = cpi->oxcf.framerate / - cpi->oxcf.ts_rate_decimator[temporal_layer]; - lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / - lc->framerate); + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; + // Update the average layer frame size (non-cumulative per-frame-bw). + if (temporal_layer == 0) { + lc->avg_frame_size = lrc->av_per_frame_bandwidth; + } else { + double prev_layer_framerate = oxcf->framerate / + oxcf->ts_rate_decimator[temporal_layer - 1]; + int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; + lc->avg_frame_size = + (int)(lc->target_bandwidth - prev_layer_target_bandwidth) / + (lc->framerate - prev_layer_framerate); + } } // Prior to encoding the frame, set the layer context, for the current layer @@ -1729,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); } -#ifdef ENTROPY_STATS - if (cpi->pass != 1) - init_context_counters(); -#endif - /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; @@ -2767,10 +2777,10 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest, - int *q) { + int q) { VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); // __asm emms; - vp9_set_quantizer(cpi, *q); + vp9_set_quantizer(cpi, q); // Set up entropy context depending on frame type. The decoder mandates // the use of the default context, index 0, for keyframes and inter @@ -2804,7 +2814,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest, - int *q, + int q, int bottom_index, int top_index) { VP9_COMMON *const cm = &cpi->common; @@ -2824,7 +2834,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, do { vp9_clear_system_state(); // __asm emms; - vp9_set_quantizer(cpi, *q); + vp9_set_quantizer(cpi, q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2881,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if ((cm->frame_type == KEY_FRAME) && cpi->rc.this_key_frame_forced && (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) { - int last_q = *q; + int last_q = q; int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; @@ -2897,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi, (kf_err > low_err_target && cpi->rc.projected_frame_size <= frame_under_shoot_limit)) { // Lower q_high - q_high = *q > q_low ? *q - 1 : q_low; + q_high = q > q_low ? q - 1 : q_low; // Adjust Q - *q = ((*q) * high_err_target) / kf_err; - *q = MIN((*q), (q_high + q_low) >> 1); + q = (q * high_err_target) / kf_err; + q = MIN(q, (q_high + q_low) >> 1); } else if (kf_err < low_err_target && cpi->rc.projected_frame_size >= frame_under_shoot_limit) { // The key frame is much better than the previous frame // Raise q_low - q_low = *q < q_high ? *q + 1 : q_high; + q_low = q < q_high ? q + 1 : q_high; // Adjust Q - *q = ((*q) * low_err_target) / kf_err; - *q = MIN((*q), (q_high + q_low + 1) >> 1); + q = (q * low_err_target) / kf_err; + q = MIN(q, (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: - *q = clamp(*q, q_low, q_high); + q = clamp(q, q_low, q_high); - loop = *q != last_q; + loop = q != last_q; } else if (recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, - *q, MAX(q_high, top_index), bottom_index)) { + q, MAX(q_high, top_index), bottom_index)) { // Is the projected frame size out of range and are we allowed // to attempt to recode. - int last_q = *q; + int last_q = q; int retries = 0; // Frame size out of permitted range: @@ -2935,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi, q_high = cpi->rc.worst_quality; // Raise Qlow as to at least the current value - q_low = *q < q_high ? *q + 1 : q_high; + q_low = q < q_high ? q + 1 : q_high; if (undershoot_seen || loop_count > 1) { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi, 1); - *q = (q_high + q_low + 1) / 2; + q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, MAX(q_high, top_index)); - while (*q < q_low && retries < 10) { + while (q < q_low && retries < 10) { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, MAX(q_high, top_index)); retries++; } @@ -2960,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi, overshoot_seen = 1; } else { // Frame is too small - q_high = *q > q_low ? *q - 1 : q_low; + q_high = q > q_low ? q - 1 : q_low; if (overshoot_seen || loop_count > 1) { vp9_rc_update_rate_correction_factors(cpi, 1); - *q = (q_high + q_low) / 2; + q = (q_high + q_low) / 2; } else { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, top_index); // Special case reset for qlow for constrained quality. // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && - *q < q_low) { - q_low = *q; + q < q_low) { + q_low = q; } - while (*q > q_high && retries < 10) { + while (q > q_high && retries < 10) { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, top_index); retries++; } @@ -2990,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } // Clamp Q to upper and lower limits: - *q = clamp(*q, q_low, q_high); + q = clamp(q, q_low, q_high); - loop = *q != last_q; + loop = q != last_q; } else { loop = 0; } @@ -3210,9 +3220,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } if (cpi->sf.recode_loop == DISALLOW_RECODE) { - encode_without_recode_loop(cpi, size, dest, &q); + encode_without_recode_loop(cpi, size, dest, q); } else { - encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index); + encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index); } // Special case code to reduce pulsing when key frames are forced at a @@ -3272,10 +3282,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } } -#ifdef ENTROPY_STATS - vp9_update_mode_context_stats(cpi); -#endif - #if 0 output_frame_level_debug_stats(cpi); #endif diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 5fd8e5a54..90ed606f6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -45,7 +45,7 @@ extern "C" { #else #define MIN_GF_INTERVAL 4 #endif -#define DEFAULT_GF_INTERVAL 11 +#define DEFAULT_GF_INTERVAL 10 #define DEFAULT_KF_BOOST 2000 #define DEFAULT_GF_BOOST 2000 @@ -401,6 +401,7 @@ typedef struct { int64_t optimal_buffer_level; int64_t maximum_buffer_size; double framerate; + int avg_frame_size; } LAYER_CONTEXT; typedef struct VP9_COMP { @@ -691,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi); void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); - void vp9_set_speed_features(VP9_COMP *cpi); int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b2581072e..b3e9f4538 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -215,7 +215,7 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { rc->av_per_frame_bandwidth >> 5); if (target < min_frame_target) target = min_frame_target; - if (cpi->refresh_golden_frame && rc->source_alt_ref_active) { + if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { // If there is an active ARF at this location use the minimum // bits on this frame even if it is a constructed arf. // The active maximum quantizer insures that an appropriate @@ -487,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, double q_adj_factor = 1.0; double q_val; - // Baseline value derived from cpi->active_worst_quality and kf boost - active_best_quality = get_active_quality(active_worst_quality, + active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME], rc->kf_boost, kf_low, kf_high, kf_low_motion_minq, @@ -521,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { q = rc->avg_frame_qindex[INTER_FRAME]; } else { - q = active_worst_quality; + q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? + active_worst_quality : rc->avg_frame_qindex[KEY_FRAME]; } // For constrained quality dont allow Q less than the cq level if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) { @@ -565,10 +565,24 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, active_best_quality = cpi->cq_target_quality; } else { // Use the lower of active_worst_quality and recent/average Q. - if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) - active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; - else - active_best_quality = inter_minq[active_worst_quality]; + if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + if (cm->current_video_frame > 1) { + if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) + active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + else + active_best_quality = inter_minq[active_worst_quality]; + } else { + if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality) + active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + else + active_best_quality = inter_minq[active_worst_quality]; + } + } else { + if (cm->current_video_frame > 1) + active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + else + active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + } // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) && @@ -973,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1056,7 +1071,7 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { #define USE_ALTREF_FOR_ONE_PASS 1 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { - static const int af_ratio = 5; + static const int af_ratio = 10; const RATE_CONTROL *rc = &cpi->rc; int target; #if USE_ALTREF_FOR_ONE_PASS @@ -1073,55 +1088,71 @@ static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { } static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { - static const int kf_ratio = 12; + static const int kf_ratio = 25; const RATE_CONTROL *rc = &cpi->rc; int target = rc->av_per_frame_bandwidth * kf_ratio; return vp9_rc_clamp_iframe_target_size(cpi, target); } +static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { + int active_worst_quality; + if (cpi->common.frame_type == KEY_FRAME) { + if (cpi->common.current_video_frame == 0) { + active_worst_quality = cpi->rc.worst_quality; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; + } + } else if (!cpi->rc.is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + if (cpi->common.current_video_frame == 1) { + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[INTER_FRAME]; + } + } else { + if (cpi->common.current_video_frame == 1) { + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2; + } + } + if (active_worst_quality > cpi->rc.worst_quality) + active_worst_quality = cpi->rc.worst_quality; + return active_worst_quality; +} + void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; int target; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || + rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = DEFAULT_KF_BOOST; - cpi->rc.source_alt_ref_active = 0; - if (cm->current_video_frame == 0) { - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } else { - // Choose active worst quality twice as large as the last q. - cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; - if (cpi->rc.active_worst_quality > cpi->rc.worst_quality) - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } + rc->this_key_frame_forced = cm->current_video_frame != 0 && + rc->frames_to_key == 0; + rc->frames_to_key = cpi->key_frame_frequency; + rc->kf_boost = DEFAULT_KF_BOOST; + rc->source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; - if (cm->current_video_frame == 1) { - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } else { - // Choose active worst quality twice as large as the last q. - cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2; - if (cpi->rc.active_worst_quality > cpi->rc.worst_quality) - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } } - if (cpi->rc.frames_till_gf_update_due == 0) { - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; + if (rc->frames_till_gf_update_due == 0) { + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; // NOTE: frames_till_gf_update_due must be <= frames_to_key. - if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key) - cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key; + if (rc->frames_till_gf_update_due > rc->frames_to_key) + rc->frames_till_gf_update_due = rc->frames_to_key; cpi->refresh_golden_frame = 1; - cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; - cpi->rc.gfu_boost = DEFAULT_GF_BOOST; + rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; + rc->gfu_boost = DEFAULT_GF_BOOST; } + cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); if (cm->frame_type == KEY_FRAME) target = calc_iframe_target_size_one_pass_vbr(cpi); else @@ -1140,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { const RATE_CONTROL *rc = &cpi->rc; int active_worst_quality = rc->active_worst_quality; // Maximum limit for down adjustment, ~20%. - int max_adjustment_down = active_worst_quality / 5; // Buffer level below which we push active_worst to worst_quality. int critical_level = oxcf->optimal_buffer_level >> 2; int adjustment = 0; int buff_lvl_step = 0; + if (cpi->common.frame_type == KEY_FRAME) + return rc->worst_quality; if (rc->buffer_level > oxcf->optimal_buffer_level) { // Adjust down. + int max_adjustment_down = active_worst_quality / 5; if (max_adjustment_down) { buff_lvl_step = (int)((oxcf->maximum_buffer_size - oxcf->optimal_buffer_level) / max_adjustment_down); @@ -1176,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; - int target = rc->av_per_frame_bandwidth; - const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, - FRAME_OVERHEAD_BITS); const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; + int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, + FRAME_OVERHEAD_BITS); + int target = rc->av_per_frame_bandwidth; + if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + // Note that for layers, av_per_frame_bandwidth is the cumulative + // per-frame-bandwidth. For the target size of this frame, use the + // layer average frame size (i.e., non-cumulative per-frame-bw). + int current_temporal_layer = cpi->svc.temporal_layer_id; + const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + target = lc->avg_frame_size; + min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); + } if (diff > 0) { // Lower the target bandwidth for this frame. const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct); @@ -1190,26 +1232,23 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct); target += (target * pct_high) / 200; } - if (target < min_frame_target) - target = min_frame_target; - return target; + return MAX(min_frame_target, target); } static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { - int target; const RATE_CONTROL *rc = &cpi->rc; + if (cpi->common.current_video_frame == 0) { - target = cpi->oxcf.starting_buffer_level / 2; + return cpi->oxcf.starting_buffer_level / 2; } else { - int initial_boost = 32; + const int initial_boost = 32; int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); if (rc->frames_since_key < cpi->output_framerate / 2) { kf_boost = (int)(kf_boost * rc->frames_since_key / (cpi->output_framerate / 2)); } - target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4; + return ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4; } - return target; } void vp9_rc_get_svc_params(VP9_COMP *cpi) { @@ -1223,16 +1262,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->rc.source_alt_ref_active = 0; if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { target = calc_iframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = cpi->rc.worst_quality; } } else { cm->frame_type = INTER_FRAME; if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { target = calc_pframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = - calc_active_worst_quality_one_pass_cbr(cpi); } } + cpi->rc.active_worst_quality = + calc_active_worst_quality_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); cpi->rc.frames_till_gf_update_due = INT_MAX; cpi->rc.baseline_gf_interval = INT_MAX; @@ -1240,27 +1278,27 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0 || cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || + rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = DEFAULT_KF_BOOST; - cpi->rc.source_alt_ref_active = 0; + rc->this_key_frame_forced = cm->current_video_frame != 0 && + rc->frames_to_key == 0; + rc->frames_to_key = cpi->key_frame_frequency; + rc->kf_boost = DEFAULT_KF_BOOST; + rc->source_alt_ref_active = 0; target = calc_iframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = cpi->rc.worst_quality; } else { cm->frame_type = INTER_FRAME; target = calc_pframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = - calc_active_worst_quality_one_pass_cbr(cpi); } + cpi->rc.active_worst_quality = + calc_active_worst_quality_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); // Don't use gf_update by default in CBR mode. - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 7b17b8582..fcfab2a41 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1645,14 +1645,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi_buf, int filter_idx, int_mv seg_mvs[4][MAX_REF_FRAMES], int mi_row, int mi_col) { - int i, br = 0, idx, idy; + int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; MB_PREDICTION_MODE this_mode; + MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = x->e_mbd.mi_8x8[0]; + MODE_INFO *mi = xd->mi_8x8[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; const int label_count = 4; int64_t this_segment_rd = 0; int label_mv_thresh; @@ -1660,7 +1661,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize = mbmi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - vp9_variance_fn_ptr_t *v_fn_ptr; + vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; int mode_idx; @@ -1670,8 +1671,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); - v_fn_ptr = &cpi->fn_ptr[bsize]; - // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on // segments. setting this to 1 would make mv thresh @@ -1687,20 +1686,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; MB_PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; - i = idy * 2 + idx; - - frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile, - i, 0, mi_row, mi_col, - &frame_mv[NEARESTMV][mbmi->ref_frame[0]], - &frame_mv[NEARMV][mbmi->ref_frame[0]]); - if (has_second_rf) { - frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile, - i, 1, mi_row, mi_col, - &frame_mv[NEARESTMV][mbmi->ref_frame[1]], - &frame_mv[NEARMV][mbmi->ref_frame[1]]); + const int i = idy * 2 + idx; + int ref; + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + frame_mv[ZEROMV][frame].as_int = 0; + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col, + &frame_mv[NEARESTMV][frame], + &frame_mv[NEARMV][frame]); } + // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { const struct buf_2d orig_src = x->plane[0].src; @@ -2042,8 +2038,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->sse = block_sse; // update the coding decisions - for (i = 0; i < 4; ++i) - bsi->modes[i] = mi->bmi[i].as_mode; + for (k = 0; k < 4; ++k) + bsi->modes[k] = mi->bmi[k].as_mode; } static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index db306603b..48ccef8cc 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmaxsw m8, m7 pshuflw m7, m8, 0x1 pmaxsw m8, m7 - pextrw [r2], m8, 0 + pextrw r6, m8, 0 + mov [r2], r6 RET ; skip-block, i.e. just write all zeroes @@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endmacro INIT_XMM ssse3 -QUANTIZE_FN b, 6 +QUANTIZE_FN b, 7 QUANTIZE_FN b_32x32, 7 |