diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/arm/neon/vp9_quantize_neon.c | 43 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 36 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 61 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 113 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 34 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 26 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 9 |
10 files changed, 192 insertions, 145 deletions
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c index 2d5ec79b3..8c13d0da6 100644 --- a/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -28,7 +28,6 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - int i; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; @@ -39,7 +38,7 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, if (!skip_block) { // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. - + int i; const int16x8_t v_zero = vdupq_n_s16(0); const int16x8_t v_one = vdupq_n_s16(1); int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1); @@ -50,13 +49,37 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, v_round = vsetq_lane_s16(round_ptr[0], v_round, 0); v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0); v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0); - - for (i = 0; i < count; i += 8) { + // process dc and the first seven ac coeffs + { + const int16x8_t v_iscan = vld1q_s16(&iscan[0]); + const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]); + const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); + const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); + const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), + vget_low_s16(v_quant)); + const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), + vget_high_s16(v_quant)); + const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), + vshrn_n_s32(v_tmp_hi, 16)); + const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); + const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); + const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); + const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); + const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); + const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); + v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); + vst1q_s16(&qcoeff_ptr[0], v_qcoeff); + vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff); + v_round = vmovq_n_s16(round_ptr[1]); + v_quant = vmovq_n_s16(quant_ptr[1]); + v_dequant = vmovq_n_s16(dequant_ptr[1]); + } + // now process the rest of the ac coeffs + for (i = 8; i < count; i += 8) { const int16x8_t v_iscan = vld1q_s16(&iscan[i]); const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_abs_coeff = vabsq_s16(v_coeff); - const int16x8_t v_tmp = vqaddq_s16(v_abs_coeff, v_round); + const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), @@ -65,19 +88,13 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, vshrn_n_s32(v_tmp_hi, 16)); const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = - vandq_s16(vmvnq_s16(vreinterpretq_s16_u16(v_nz_mask)), v_iscan_plus1); + const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[i], v_qcoeff); vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff); - v_round = vmovq_n_s16(round_ptr[1]); - v_quant = vmovq_n_s16(quant_ptr[1]); - v_dequant = vmovq_n_s16(dequant_ptr[1]); } { const int16x4_t v_eobmax_3210 = diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 60f20c12d..445608a3d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -541,6 +541,20 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { set_tile_limits(cpi); } +static void set_rc_buffer_sizes(RATE_CONTROL *rc, + const VP9EncoderConfig *oxcf) { + const int64_t bandwidth = oxcf->target_bandwidth; + const int64_t starting = oxcf->starting_buffer_level_ms; + const int64_t optimal = oxcf->optimal_buffer_level_ms; + const int64_t maximum = oxcf->maximum_buffer_size_ms; + + rc->starting_buffer_level = starting * bandwidth / 1000; + rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8 + : optimal * bandwidth / 1000; + rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8 + : maximum * bandwidth / 1000; +} + void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -574,28 +588,8 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { } cpi->encode_breakout = cpi->oxcf.encode_breakout; - // local file playback mode == really big buffer - if (cpi->oxcf.rc_mode == VPX_VBR) { - cpi->oxcf.starting_buffer_level_ms = 60000; - cpi->oxcf.optimal_buffer_level_ms = 60000; - cpi->oxcf.maximum_buffer_size_ms = 240000; - } - - rc->starting_buffer_level = cpi->oxcf.starting_buffer_level_ms * - cpi->oxcf.target_bandwidth / 1000; - - // Set or reset optimal and maximum buffer levels. - if (cpi->oxcf.optimal_buffer_level_ms == 0) - rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; - else - rc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level_ms * - cpi->oxcf.target_bandwidth / 1000; + set_rc_buffer_sizes(rc, &cpi->oxcf); - if (cpi->oxcf.maximum_buffer_size_ms == 0) - rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; - else - rc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size_ms * - cpi->oxcf.target_bandwidth / 1000; // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 284ae9dc9..ec8ac82bb 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -495,14 +495,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( .buf; } -// Intra only frames, golden frames (except alt ref overlays) and -// alt ref frames tend to be coded at a higher than ambient quality -static INLINE int frame_is_boosted(const VP9_COMP *cpi) { - return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || - vp9_is_upper_layer_key_frame(cpi); -} - static INLINE int get_token_alloc(int mb_rows, int mb_cols) { // TODO(JBB): double check we can't exceed this token count if we have a // 32x32 transform crossing a boundary at a multiple of 16. diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 81ce5ea46..94bbe9c69 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -432,6 +432,8 @@ void vp9_first_pass(VP9_COMP *cpi) { TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; + LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? + &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -444,15 +446,14 @@ void vp9_first_pass(VP9_COMP *cpi) { set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex()); - if (is_spatial_svc(cpi)) { + if (lc != NULL) { MV_REFERENCE_FRAME ref_frame = LAST_FRAME; const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; - twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + twopass = &lc->twopass; if (cpi->common.current_video_frame == 0) { cpi->ref_frame_flags = 0; } else { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = VP9_GOLD_FLAG; else @@ -613,7 +614,7 @@ void vp9_first_pass(VP9_COMP *cpi) { &unscaled_last_source_buf_2d); // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25 || is_spatial_svc(cpi)) { + if (raw_motion_error > 25 || lc != NULL) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, @@ -895,7 +896,7 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_extend_frame_borders(new_yv12); - if (is_spatial_svc(cpi)) { + if (lc != NULL) { vp9_update_reference_frames(cpi); } else { // Swap frame pointers so last frame refers to the frame we just compressed. @@ -1081,8 +1082,7 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm, // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. -static double get_zero_motion_factor(const VP9_COMMON *cm, - const FIRSTPASS_STATS *frame) { +static double get_zero_motion_factor(const FIRSTPASS_STATS *frame) { const double sr_ratio = frame->coded_error / DOUBLE_DIVIDE_CHECK(frame->sr_coded_error); const double zero_motion_pct = frame->pcnt_inter - @@ -1095,12 +1095,10 @@ static double get_zero_motion_factor(const VP9_COMMON *cm, // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(TWO_PASS *twopass, +static int detect_transition_to_still(const TWO_PASS *twopass, int frame_interval, int still_interval, double loop_decay_rate, double last_decay_rate) { - int trans_to_still = 0; - // Break clause to detect very still sections after motion // For example a static image after a fade or other transition // instead of a clean scene cut. @@ -1108,26 +1106,22 @@ static int detect_transition_to_still(TWO_PASS *twopass, loop_decay_rate >= 0.999 && last_decay_rate < 0.9) { int j; - const FIRSTPASS_STATS *position = twopass->stats_in; - FIRSTPASS_STATS tmp_next_frame; // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { - if (EOF == input_stats(twopass, &tmp_next_frame)) + const FIRSTPASS_STATS *stats = &twopass->stats_in[j]; + if (stats >= twopass->stats_in_end) break; - if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) + if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; } - reset_fpf_position(twopass, position); - // Only if it does do we signal a transition to still. - if (j == still_interval) - trans_to_still = 1; + return j == still_interval; } - return trans_to_still; + return 0; } // This function detects a flash through the high relative pcnt_second_ref @@ -1555,8 +1549,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vp9_clear_system_state(); vp9_zero(next_frame); - gf_group_bits = 0; - // Load stats for the current frame. mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); @@ -1616,9 +1608,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { decay_accumulator = decay_accumulator * loop_decay_rate; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, - get_zero_motion_factor(&cpi->common, &next_frame)); + zero_motion_accumulator = MIN(zero_motion_accumulator, + get_zero_motion_factor(&next_frame)); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. @@ -1989,9 +1980,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; // Monitor for static sections. - zero_motion_accumulator = - MIN(zero_motion_accumulator, - get_zero_motion_factor(&cpi->common, &next_frame)); + zero_motion_accumulator =MIN(zero_motion_accumulator, + get_zero_motion_factor(&next_frame)); // For the first few frames collect data to decide kf boost. if (i <= (rc->max_gf_interval * 2)) { @@ -2127,10 +2117,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame_copy; int target_rate; - LAYER_CONTEXT *lc = NULL; + LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ? + &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0; - if (is_spatial_svc(cpi)) { - lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (lc != NULL) { frames_left = (int)(twopass->total_stats.count - lc->current_video_frame_in_layer); } else { @@ -2157,7 +2147,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_rc_set_frame_target(cpi, target_rate); cm->frame_type = INTER_FRAME; - if (is_spatial_svc(cpi)) { + if (lc != NULL) { if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = 0; } else { @@ -2173,7 +2163,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { vp9_clear_system_state(); - if (is_spatial_svc(cpi) && twopass->kf_intra_err_min == 0) { + if (lc != NULL && twopass->kf_intra_err_min == 0) { twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; } @@ -2181,8 +2171,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (cpi->oxcf.rc_mode == VPX_Q) { twopass->active_worst_quality = cpi->oxcf.cq_level; } else if (cm->current_video_frame == 0 || - (is_spatial_svc(cpi) && - lc->current_video_frame_in_layer == 0)) { + (lc != NULL && lc->current_video_frame_in_layer == 0)) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -2208,7 +2197,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cm->frame_type = INTER_FRAME; } - if (is_spatial_svc(cpi)) { + if (lc != NULL) { if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = (cm->frame_type == KEY_FRAME); if (lc->is_key_frame) @@ -2239,7 +2228,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } rc->frames_till_gf_update_due = rc->baseline_gf_interval; - if (!is_spatial_svc(cpi)) + if (lc != NULL) cpi->refresh_golden_frame = 1; } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 6115f5a0f..e5469c831 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -17,6 +17,7 @@ #include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_reconinter.h" @@ -343,6 +344,52 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x, } } +struct estimate_block_intra_args { + VP9_COMP *cpi; + MACROBLOCK *x; + PREDICTION_MODE mode; + int rate; + int64_t dist; +}; + +static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct estimate_block_intra_args* const args = arg; + VP9_COMP *const cpi = args->cpi; + MACROBLOCK *const x = args->x; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size]; + uint8_t *const src_buf_base = p->src.buf; + uint8_t *const dst_buf_base = pd->dst.buf; + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + int i, j; + int rate; + int64_t dist; + unsigned int var_y, sse_y; + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); + assert(plane == 0); + (void) plane; + + p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; + pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; + // Use source buffer as an approximation for the fully reconstructed buffer. + vp9_predict_intra_block(xd, block >> (2 * tx_size), + b_width_log2(plane_bsize), + tx_size, args->mode, + p->src.buf, src_stride, + pd->dst.buf, dst_stride, + i, j, 0); + // This procedure assumes zero offset from p->src.buf and pd->dst.buf. + model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); + p->src.buf = src_buf_base; + pd->dst.buf = dst_buf_base; + args->rate += rate; + args->dist += dist; +} + static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = { {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV}, {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG}, @@ -360,7 +407,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; PREDICTION_MODE this_mode, best_mode = ZEROMV; MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME; @@ -397,9 +443,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; int const_motion[MAX_REF_FRAMES] = { 0 }; - int bh = num_4x4_blocks_high_lookup[bsize] << 2; - int bw = num_4x4_blocks_wide_lookup[bsize] << 2; - int pixels_in_block = bh * bw; + const int bh = num_4x4_blocks_high_lookup[bsize] << 2; + const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int pixels_in_block = bh * bw; // For speed 6, the result of interp filter is reused later in actual encoding // process. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. @@ -670,20 +716,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // threshold. if (!x->skip && best_rd > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize) { - int i, j; - const int width = num_4x4_blocks_wide_lookup[bsize]; - const int height = num_4x4_blocks_high_lookup[bsize]; + struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 }; - int rate2 = 0; - int64_t dist2 = 0; - const int dst_stride = cpi->sf.reuse_inter_pred_sby ? bw : pd->dst.stride; - const int src_stride = p->src.stride; - int block_idx = 0; - - TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize], - tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - const BLOCK_SIZE bsize_tx = txsize_to_bsize[tmp_tx_size]; - const int step = 1 << tmp_tx_size; + const TX_SIZE intra_tx_size = + MIN(max_txsize_lookup[bsize], + tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); if (cpi->sf.reuse_inter_pred_sby) { pd->dst.buf = tmp[0].data; @@ -691,44 +728,26 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) { - uint8_t *const src_buf_base = p->src.buf; - uint8_t *const dst_buf_base = pd->dst.buf; - for (j = 0; j < height; j += step) { - for (i = 0; i < width; i += step) { - p->src.buf = &src_buf_base[4 * (j * src_stride + i)]; - pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)]; - // Use source buffer as an approximation for the fully reconstructed - // buffer - vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize), - tmp_tx_size, this_mode, - p->src.buf, src_stride, - pd->dst.buf, dst_stride, - i, j, 0); - model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y); - rate2 += rate; - dist2 += dist; - ++block_idx; - } - } - p->src.buf = src_buf_base; - pd->dst.buf = dst_buf_base; - - rate = rate2; - dist = dist2; - + const TX_SIZE saved_tx_size = mbmi->tx_size; + args.mode = this_mode; + args.rate = 0; + args.dist = 0; + mbmi->tx_size = intra_tx_size; + vp9_foreach_transformed_block_in_plane(xd, bsize, 0, + estimate_block_intra, &args); + mbmi->tx_size = saved_tx_size; + rate = args.rate; + dist = args.dist; rate += cpi->mbmode_cost[this_mode]; rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); - if (cpi->sf.reuse_inter_pred_sby) - pd->dst = orig_dst; - if (this_rd + intra_mode_cost < best_rd) { best_rd = this_rd; *returnrate = rate; *returndistortion = dist; mbmi->mode = this_mode; - mbmi->tx_size = tmp_tx_size; + mbmi->tx_size = intra_tx_size; mbmi->ref_frame[0] = INTRA_FRAME; mbmi->uv_mode = this_mode; mbmi->mv[0].as_int = INVALID_MV; @@ -736,6 +755,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip_txfm[0] = skip_txfm; } } + if (cpi->sf.reuse_inter_pred_sby) + pd->dst = orig_dst; } return INT64_MAX; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 9da2adec4..b926a58f4 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -646,7 +646,6 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int q; if (frame_is_intra_only(cm)) { - active_best_quality = rc->best_quality; // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 517674e5f..cfda964ce 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -490,24 +490,24 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}, {INT64_MAX, INT64_MAX}}; - TX_SIZE n, m; + int n, m; int s0, s1; const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; int64_t best_rd = INT64_MAX; - TX_SIZE best_tx = TX_4X4; + TX_SIZE best_tx = max_tx_size; const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs); assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= max_tx_size; n++) { + for (n = max_tx_size; n >= 0; n--) { txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { - for (m = 0; m <= n - (n == max_tx_size); m++) { + for (m = 0; m <= n - (n == (int) max_tx_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(tx_probs[m]); else @@ -523,6 +523,13 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } + // Early termination in transform size search. + if (cpi->sf.tx_size_search_breakout && + (rd[n][1] == INT64_MAX || + (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) || + s[n] == 1)) + break; + if (rd[n][1] < best_rd) { best_tx = n; best_rd = rd[n][1]; @@ -2632,6 +2639,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t total_sse = INT64_MAX; int early_term = 0; + this_mode = vp9_mode_order[mode_index].mode; + ref_frame = vp9_mode_order[mode_index].ref_frame[0]; + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) + continue; + second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (mode_index == mode_skip_start && best_mode_index >= 0) { @@ -2653,6 +2666,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } } + + if (cpi->sf.alt_ref_search_fp && cpi->rc.is_src_frame_alt_ref) { + mode_skip_mask = 0; + if (!(ref_frame == ALTREF_FRAME && second_ref_frame == NONE)) + continue; + } + if (mode_skip_mask & (1 << mode_index)) continue; @@ -2661,12 +2681,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rd_thresh_freq_fact[mode_index])) continue; - this_mode = vp9_mode_order[mode_index].mode; - ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) - continue; - second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; - if (cpi->sf.motion_field_mode_search) { const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize], tile->mi_col_end - mi_col); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 67b6e269e..57835ec3d 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -50,8 +50,20 @@ enum { (1 << THR_GOLD) }; +// Intra only frames, golden frames (except alt ref overlays) and +// alt ref frames tend to be coded at a higher than ambient quality +static int frame_is_boosted(const VP9_COMP *cpi) { + return frame_is_intra_only(&cpi->common) || + cpi->refresh_alt_ref_frame || + (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) || + vp9_is_upper_layer_key_frame(cpi); +} + + static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { + const int boosted = frame_is_boosted(cpi); + sf->adaptive_rd_thresh = 1; sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW; sf->allow_skip_recode = 1; @@ -59,8 +71,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD - : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT @@ -80,9 +90,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; + + sf->tx_size_search_breakout = 1; } if (speed >= 2) { + sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; + if (MIN(cm->width, cm->height) >= 720) { sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; @@ -117,9 +132,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; } sf->adaptive_pred_interp_filter = 0; - sf->cb_partition_search = frame_is_boosted(cpi) ? 0 : 1; + sf->cb_partition_search = !boosted; sf->cb_pred_filter_search = 1; - sf->motion_field_mode_search = frame_is_boosted(cpi) ? 0 : 1; + sf->alt_ref_search_fp = 1; + sf->motion_field_mode_search = !boosted; sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->last_partitioning_redo_frequency = 3; sf->recode_loop = ALLOW_RECODE_KFMAXBW; @@ -347,6 +363,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->cb_pred_filter_search = 0; sf->cb_partition_search = 0; sf->motion_field_mode_search = 0; + sf->alt_ref_search_fp = 0; sf->use_quant_fp = 0; sf->reference_masking = 0; sf->partition_search_type = SEARCH_PARTITION; @@ -389,6 +406,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // Recode loop tolerence %. sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; + sf->tx_size_search_breakout = 0; if (oxcf->mode == REALTIME) { set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 8edcb1d72..bad956da5 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -291,6 +291,8 @@ typedef struct SPEED_FEATURES { int motion_field_mode_search; + int alt_ref_search_fp; + // Fast quantization process path int use_quant_fp; @@ -374,6 +376,10 @@ typedef struct SPEED_FEATURES { // default interp filter choice INTERP_FILTER default_interp_filter; + + // Early termination in transform size search, which only applies while + // tx_size_search_method is USE_FULL_RD. + int tx_size_search_breakout; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index fb52d1ab7..1d9bdd869 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -106,12 +106,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. - lrc->starting_buffer_level = - (int64_t)(rc->starting_buffer_level * bitrate_alloc); - lrc->optimal_buffer_level = - (int64_t)(rc->optimal_buffer_level * bitrate_alloc); - lrc->maximum_buffer_size = - (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->starting_buffer_level = rc->starting_buffer_level * bitrate_alloc; + lrc->optimal_buffer_level = rc->optimal_buffer_level * bitrate_alloc; + lrc->maximum_buffer_size = rc->maximum_buffer_size * bitrate_alloc; lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. |