diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_aq_complexity.c | 16 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 45 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 79 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 44 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 111 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 52 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 65 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 63 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 39 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 13 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm | 14 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm | 4 |
20 files changed, 263 insertions, 323 deletions
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index 47ad8d8cc..0d6b41d15 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -47,11 +47,21 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // Use some of the segments for in frame Q adjustment. for (segment = 1; segment < 2; segment++) { - const int qindex_delta = + int qindex_delta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + + // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0. + // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta + // is sometimes applied without going back around the rd loop. + // This could lead to an illegal combination of partition size and q. + if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { + qindex_delta = -cm->base_qindex + 1; + } + if ((cm->base_qindex + qindex_delta) > 0) { + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } } } } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index c406860a0..c3cd93b78 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -109,6 +109,7 @@ struct macroblock { MV pred_mv[MAX_REF_FRAMES]; void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride); + void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); }; #ifdef __cplusplus diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 6cbc38d79..c1db8263e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -478,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi, unsigned int sse = 0; int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) - vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); + vp9_get8x8var(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); } } @@ -1214,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int b_offset = b_mi_row * MI_SIZE * src_stride + b_mi_col * MI_SIZE; - vp9_get_sse_sum_16x16(src + b_offset, src_stride, - pre_src + b_offset, pre_stride, - &d16[j].sse, &d16[j].sum); + vp9_get16x16var(src + b_offset, src_stride, + pre_src + b_offset, pre_stride, + &d16[j].sse, &d16[j].sum); d16[j].var = d16[j].sse - (((uint32_t)d16[j].sum * d16[j].sum) >> 8); @@ -2369,22 +2369,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { sizeof(*xd->above_seg_context) * aligned_mi_cols); } -static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { - if (lossless) { - // printf("Switching to lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_fwht4x4; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; - cpi->mb.optimize = 0; - cpi->common.lf.filter_level = 0; - cpi->zbin_mode_boost_enabled = 0; - cpi->common.tx_mode = ONLY_4X4; - } else { - // printf("Not lossless\n"); - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; - } -} - static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; @@ -2421,7 +2405,7 @@ static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { } static TX_MODE select_tx_mode(const VP9_COMP *cpi) { - if (cpi->oxcf.lossless) { + if (cpi->mb.e_mbd.lossless) { return ONLY_4X4; } else if (cpi->common.current_video_frame == 0) { return TX_MODE_SELECT; @@ -3011,13 +2995,21 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(rd_opt->tx_select_diff); vp9_zero(rd_opt->tx_select_threshes); - cm->tx_mode = select_tx_mode(cpi); - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); + + cm->tx_mode = select_tx_mode(cpi); + + cpi->mb.fwd_txm4x4 = cpi->mb.e_mbd.lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->mb.itxm_add = cpi->mb.e_mbd.lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; + + if (cpi->mb.e_mbd.lossless) { + cpi->mb.optimize = 0; + cpi->common.lf.filter_level = 0; + cpi->zbin_mode_boost_enabled = 0; + } vp9_frame_init_quantizer(cpi); @@ -3357,7 +3349,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); } else { mbmi->skip = 1; - if (output_enabled) + if (output_enabled && + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) cm->counts.skip[vp9_get_skip_context(xd)][1]++; reset_skip_context(xd, MAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 3b231b7f2..8581e6117 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -406,7 +406,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; default: assert(0 && "Invalid transform size"); @@ -428,7 +428,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (p->eobs[block] > 0) - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -574,7 +574,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, dst_stride, *eob); + x->itxm_add(dqcoeff, dst, dst_stride, *eob); else vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 2ce5483d6..0ebc93638 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -393,11 +393,6 @@ static void set_speed_features(VP9_COMP *cpi) { // Set rd thresholds based on mode and speed setting vp9_set_rd_speed_thresholds(cpi); vp9_set_rd_speed_thresholds_sub8x8(cpi); - - cpi->mb.fwd_txm4x4 = vp9_fdct4x4; - if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { - cpi->mb.fwd_txm4x4 = vp9_fwht4x4; - } } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { @@ -596,16 +591,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { if (cpi->oxcf.mode == REALTIME) cpi->oxcf.play_alternate = 0; - cpi->oxcf.lossless = oxcf->lossless; - if (cpi->oxcf.lossless) { - // In lossless mode, make sure right quantizer range and correct transform - // is set. - cpi->oxcf.worst_allowed_q = 0; - cpi->oxcf.best_allowed_q = 0; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; - } else { - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; - } rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; @@ -627,33 +612,30 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { // local file playback mode == really big buffer if (cpi->oxcf.rc_mode == RC_MODE_VBR) { - cpi->oxcf.starting_buffer_level = 60000; - cpi->oxcf.optimal_buffer_level = 60000; - cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level_ms = 60000; + cpi->oxcf.optimal_buffer_level_ms = 60000; + cpi->oxcf.maximum_buffer_size_ms = 240000; } - cpi->oxcf.starting_buffer_level = - vp9_rescale(cpi->oxcf.starting_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + rc->starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level_ms, + cpi->oxcf.target_bandwidth, 1000); // Set or reset optimal and maximum buffer levels. - if (cpi->oxcf.optimal_buffer_level == 0) - cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; + if (cpi->oxcf.optimal_buffer_level_ms == 0) + rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.optimal_buffer_level = - vp9_rescale(cpi->oxcf.optimal_buffer_level, - cpi->oxcf.target_bandwidth, 1000); + rc->optimal_buffer_level = vp9_rescale(cpi->oxcf.optimal_buffer_level_ms, + cpi->oxcf.target_bandwidth, 1000); - if (cpi->oxcf.maximum_buffer_size == 0) - cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; + if (cpi->oxcf.maximum_buffer_size_ms == 0) + rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.maximum_buffer_size = - vp9_rescale(cpi->oxcf.maximum_buffer_size, - cpi->oxcf.target_bandwidth, 1000); + rc->maximum_buffer_size = vp9_rescale(cpi->oxcf.maximum_buffer_size_ms, + cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, cpi->oxcf.maximum_buffer_size); - rc->buffer_level = MIN(rc->buffer_level, cpi->oxcf.maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->buffer_level = MIN(rc->buffer_level, rc->maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); @@ -1439,21 +1421,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, vp8_yv12_extend_frame_borders_c(dst); } -static int find_fp_qindex() { - int i; - - for (i = 0; i < QINDEX_RANGE; i++) { - if (vp9_convert_qindex_to_q(i) >= 30.0) { - break; - } - } - - if (i == QINDEX_RANGE) - i--; - - return i; -} - #define WRITE_RECON_BUFFER 0 #if WRITE_RECON_BUFFER void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { @@ -2308,17 +2275,6 @@ static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, encode_frame_to_data_rate(cpi, size, dest, frame_flags); } -static void Pass1Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, - unsigned int *frame_flags) { - (void) size; - (void) dest; - (void) frame_flags; - - vp9_rc_get_first_pass_params(cpi); - vp9_set_quantizer(&cpi->common, find_fp_qindex()); - vp9_first_pass(cpi); -} - static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; @@ -2658,7 +2614,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->pass == 1 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { - Pass1Encode(cpi, size, dest, frame_flags); + const int lossless = is_lossless_requested(&cpi->oxcf); + cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; + vp9_first_pass(cpi); } else if (cpi->pass == 2 && (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass2Encode(cpi, size, dest, frame_flags); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 47c901975..c69a345d0 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -216,9 +216,9 @@ typedef struct VP9EncoderConfig { int over_shoot_pct; // buffering parameters - int64_t starting_buffer_level; // in seconds - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; + int64_t starting_buffer_level_ms; + int64_t optimal_buffer_level_ms; + int64_t maximum_buffer_size_ms; // Frame drop threshold. int drop_frames_water_mark; @@ -228,7 +228,6 @@ typedef struct VP9EncoderConfig { int worst_allowed_q; int best_allowed_q; int cq_level; - int lossless; AQ_MODE aq_mode; // Adaptive Quantization mode // Internal frame size scaling. @@ -257,7 +256,6 @@ typedef struct VP9EncoderConfig { // these parameters aren't to be used in final build don't use!!! int play_alternate; - int alt_freq; int encode_breakout; // early breakout : for video conf recommend 800 @@ -286,6 +284,10 @@ typedef struct VP9EncoderConfig { vp8e_tuning tuning; } VP9EncoderConfig; +static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { + return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; +} + static INLINE int is_best_mode(MODE mode) { return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 0d4f2c72c..dc3832b16 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -398,6 +398,32 @@ static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { } } +static int find_fp_qindex() { + int i; + + for (i = 0; i < QINDEX_RANGE; ++i) + if (vp9_convert_qindex_to_q(i) >= 30.0) + break; + + if (i == QINDEX_RANGE) + i--; + + return i; +} + +static void set_first_pass_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (!cpi->refresh_alt_ref_frame && + (cm->current_video_frame == 0 || + (cpi->frame_flags & FRAMEFLAGS_KEY))) { + cm->frame_type = KEY_FRAME; + } else { + cm->frame_type = INTER_FRAME; + } + // Do not use periodic key frames. + cpi->rc.frames_to_key = INT_MAX; +} + void vp9_first_pass(VP9_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; @@ -438,6 +464,9 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_clear_system_state(); + set_first_pass_params(cpi); + vp9_set_quantizer(cm, find_fp_qindex()); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { MV_REFERENCE_FRAME ref_frame = LAST_FRAME; const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; @@ -1576,7 +1605,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break out conditions. if ( - // Break at cpi->max_gf_interval unless almost totally static. + // Break at active_max_gf_interval unless almost totally static. (i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) || ( // Don't break out with a very short interval. @@ -2051,19 +2080,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->modified_error_left -= kf_group_err; } -void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || - (cpi->frame_flags & FRAMEFLAGS_KEY))) { - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - // Do not use periodic key frames. - cpi->rc.frames_to_key = INT_MAX; -} - // For VBR...adjustment to the frame target based on error from previous frames void vbr_rate_correction(int * this_frame_target, const int64_t vbr_bits_off_target) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 9d2b2a497..dbd19a2d6 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -524,7 +524,8 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride); + get_buf_from_mv(in_what, ref_mv), in_what->stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 11633a73d..913b8ead4 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -280,8 +280,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; - if (cpi->sf.disable_inter_mode_mask[bsize] & - (1 << INTER_OFFSET(this_mode))) + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]], diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 6f646ea0e..0163fd1e8 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -159,7 +159,7 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { lrc->bits_off_target += bits_off_for_this_layer; // Clip buffer level to maximum buffer size for the layer. - lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = lrc->bits_off_target; } } @@ -167,7 +167,6 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { // Update the buffer level: leaky bucket model. static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; // Non-viewable frames are a special case and are treated as pure overhead. @@ -178,7 +177,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } // Clip the buffer level to the maximum specified buffer size. - rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) { @@ -188,23 +187,20 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { if (pass == 0 && oxcf->rc_mode == RC_MODE_CBR) { - rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; - rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; - rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; + rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; } else { - rc->avg_frame_qindex[0] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - rc->avg_frame_qindex[1] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - rc->avg_frame_qindex[2] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + rc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; } rc->last_q[KEY_FRAME] = oxcf->best_allowed_q; rc->last_q[INTER_FRAME] = oxcf->best_allowed_q; - rc->buffer_level = oxcf->starting_buffer_level; - rc->bits_off_target = oxcf->starting_buffer_level; + rc->buffer_level = rc->starting_buffer_level; + rc->bits_off_target = rc->starting_buffer_level; rc->rolling_target_bits = rc->avg_frame_bandwidth; rc->rolling_actual_bits = rc->avg_frame_bandwidth; @@ -250,7 +246,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(oxcf->drop_frames_water_mark * - oxcf->optimal_buffer_level / 100); + rc->optimal_buffer_level / 100); if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { --rc->decimation_factor; @@ -444,10 +440,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). const VP9_COMMON *const cm = &cpi->common; - const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. - int64_t critical_level = oxcf->optimal_buffer_level >> 2; + int64_t critical_level = rc->optimal_buffer_level >> 2; int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; @@ -459,26 +454,26 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { else active_worst_quality = MIN(rc->worst_quality, rc->avg_frame_qindex[KEY_FRAME] * 3 / 2); - if (rc->buffer_level > oxcf->optimal_buffer_level) { + if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment, ~30%. int max_adjustment_down = active_worst_quality / 3; if (max_adjustment_down) { - buff_lvl_step = ((oxcf->maximum_buffer_size - - oxcf->optimal_buffer_level) / max_adjustment_down); + buff_lvl_step = ((rc->maximum_buffer_size - + rc->optimal_buffer_level) / max_adjustment_down); if (buff_lvl_step) - adjustment = (int)((rc->buffer_level - oxcf->optimal_buffer_level) / + adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) / buff_lvl_step); active_worst_quality -= adjustment; } } else if (rc->buffer_level > critical_level) { // Adjust up from ambient Q. if (critical_level) { - buff_lvl_step = (oxcf->optimal_buffer_level - critical_level); + buff_lvl_step = (rc->optimal_buffer_level - critical_level); if (buff_lvl_step) { adjustment = (int)((rc->worst_quality - rc->avg_frame_qindex[INTER_FRAME]) * - (oxcf->optimal_buffer_level - rc->buffer_level) / + (rc->optimal_buffer_level - rc->buffer_level) / buff_lvl_step); } active_worst_quality = rc->avg_frame_qindex[INTER_FRAME] + adjustment; @@ -1086,21 +1081,21 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->last_q[KEY_FRAME] = qindex; rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) { - rc->avg_frame_qindex[2] = - ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2); } else { - rc->last_q[INTER_FRAME] = qindex; - rc->avg_frame_qindex[INTER_FRAME] = + if (rc->is_src_frame_alt_ref || + !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) || + (cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) { + rc->last_q[INTER_FRAME] = qindex; + rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2); - rc->ni_frames++; - rc->tot_q += vp9_convert_qindex_to_q(qindex); - rc->avg_q = rc->tot_q / rc->ni_frames; - // Calculate the average Q for normal inter frames (not key or GFU frames). - rc->ni_tot_qi += qindex; - rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; + rc->ni_frames++; + rc->tot_q += vp9_convert_qindex_to_q(qindex); + rc->avg_q = rc->tot_q / rc->ni_frames; + // Calculate the average Q for normal inter frames (not key or GFU + // frames). + rc->ni_tot_qi += qindex; + rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; + } } // Keep record of last boosted (KF/KF/ARF) Q value. @@ -1227,8 +1222,8 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; const SVC *const svc = &cpi->svc; - const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; - const int64_t one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; + const int64_t diff = rc->optimal_buffer_level - rc->buffer_level; + const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100; int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->avg_frame_bandwidth; if (svc->number_temporal_layers > 1 && @@ -1259,8 +1254,8 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const SVC *const svc = &cpi->svc; int target; if (cpi->common.current_video_frame == 0) { - target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX) - ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2); + target = ((rc->starting_buffer_level / 2) > INT_MAX) + ? INT_MAX : (int)(rc->starting_buffer_level / 2); } else { int kf_boost = 32; double framerate = oxcf->framerate; @@ -1388,6 +1383,24 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } +void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc) { + // Set Maximum gf/arf interval + rc->max_gf_interval = 16; + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = oxcf->key_freq >> 1; + + // Special conditions when alt ref frame enabled + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; +} + void vp9_rc_update_framerate(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1412,21 +1425,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - // Special conditions when alt ref frame enabled in lagged compress mode - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (rc->max_gf_interval > oxcf->lag_in_frames - 1) - rc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, rc); } diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 614078eef..f1a4a3f6d 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -61,7 +61,7 @@ typedef struct { int ni_av_qi; int ni_tot_qi; int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + int avg_frame_qindex[FRAME_TYPES]; double tot_q; double avg_q; @@ -84,6 +84,10 @@ typedef struct { int worst_quality; int best_quality; + + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; // int active_best_quality; } RATE_CONTROL; @@ -178,6 +182,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, void vp9_rc_update_framerate(struct VP9_COMP *cpi); +void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index d402d7b40..f68aa2738 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1675,9 +1675,9 @@ static INLINE int mv_has_subpel(const MV *mv) { static int check_best_zero_mv( const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int disable_inter_mode_mask, int this_mode, + int inter_mode_mask, int this_mode, const MV_REFERENCE_FRAME ref_frames[2]) { - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + if ((inter_mode_mask & (1 << ZEROMV)) && (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == 0 && (ref_frames[1] == NONE || @@ -1743,7 +1743,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); - const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(*bsi); @@ -1792,11 +1792,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; - if (disable_inter_mode_mask & (1 << mode_idx)) + if (!(inter_mode_mask & (1 << this_mode))) continue; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, + inter_mode_mask, this_mode, mbmi->ref_frame)) continue; @@ -3063,7 +3063,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; - int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(best_mbmode); x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3130,7 +3130,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int inter_non_zero_mode_mask = 0x1F7F7; mode_skip_mask |= inter_non_zero_mode_mask; mode_skip_mask &= ~(1 << THR_ZEROMV); - disable_inter_mode_mask = ~(1 << INTER_OFFSET(ZEROMV)); + inter_mode_mask = (1 << ZEROMV); } // Disable this drop out case if the ref frame @@ -3182,7 +3182,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_index = THR_ZEROMV; mode_skip_mask = ~(1 << mode_index); mode_skip_start = MAX_MODES; - disable_inter_mode_mask = 0; + inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | + (1 << NEWMV); } for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { @@ -3229,8 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && - disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) continue; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; @@ -3279,7 +3279,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, this_mode, ref_frames)) + inter_mode_mask, this_mode, ref_frames)) continue; } } @@ -3665,7 +3665,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; - int ref_frame_mask = 0; int mode_skip_mask = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3700,17 +3699,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; - ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { - ref_frame_mask |= (1 << ref_frame); - break; - } - } - } - for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3805,11 +3793,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && - ref_frame != INTRA_FRAME) { - continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -4034,15 +4017,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); + // Skip is never coded at the segment level for sub8x8 blocks and instead + // always coded in the bitstream at the mode info level. - if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -4057,7 +4035,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 5ea09a8a7..e85d08a6d 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -99,41 +99,44 @@ static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, int step_param, int error_per_bit, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { + const SPEED_FEATURES *const sf = &cpi->sf; + const SEARCH_METHODS method = sf->search_method; + vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; - if (cpi->sf.search_method == FAST_DIAMOND) { - var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == FAST_HEX) { - var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == HEX) { - var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == SQUARE) { - var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == BIGDIA) { - var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, - &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); - if (rd && var < var_max) - var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); - } else { - int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, - further_steps, 1, &cpi->fn_ptr[bsize], - ref_mv, tmp_mv); + switch (method) { + case FAST_DIAMOND: + var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case FAST_HEX: + var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case HEX: + var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case SQUARE: + var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case BIGDIA: + var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, + fn_ptr, 1, ref_mv, tmp_mv); + break; + case NSTEP: + var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, + (sf->max_step_search_steps - 1) - step_param, + 1, fn_ptr, ref_mv, tmp_mv); + break; + default: + assert(!"Invalid search method."); } + if (method != NSTEP && rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); + return var; } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 7c3abd5d7..b7f839747 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -14,20 +14,23 @@ #include "vp9/encoder/vp9_speed_features.h" enum { - ALL_INTRA_MODES = (1 << DC_PRED) | + INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) | (1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED), - - INTRA_DC_ONLY = (1 << DC_PRED), - - INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED), - + INTRA_DC = (1 << DC_PRED), + INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), + INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | + (1 << H_PRED) +}; - INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED) +enum { + INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), + INTER_NEAREST = (1 << NEARESTMV), + INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) }; enum { @@ -140,8 +143,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->search_method = HEX; sf->disable_filter_search_var_thresh = 500; for (i = 0; i < TX_SIZES; ++i) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[i] = INTRA_DC; + sf->intra_uv_mode_mask[i] = INTRA_DC; } cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; } @@ -156,7 +159,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->adaptive_rd_thresh = 1; sf->use_fast_coef_costing = 1; - if (speed == 1) { + if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD @@ -179,13 +182,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, } if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; else sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; @@ -193,28 +192,18 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; sf->reference_masking = 1; - sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } if (speed >= 3) { @@ -246,15 +235,15 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->subpel_force_stop = 1; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC; } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; sf->max_intra_bsize = BLOCK_32X32; sf->allow_skip_recode = 1; } @@ -285,7 +274,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, if (speed >= 7) { int i; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[i] = INTER_NEAREST; } } @@ -302,7 +291,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; sf->subpel_force_stop = 0; - sf->optimize_coefficients = !oxcf->lossless; + sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; @@ -330,8 +319,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 0; for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_y_mode_mask[i] = INTRA_ALL; + sf->intra_uv_mode_mask[i] = INTRA_ALL; } sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; @@ -343,7 +332,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->use_nonrd_pick_mode = 0; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; + sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index a54599e6a..3e7cd27d8 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -331,8 +331,8 @@ typedef struct SPEED_FEATURES { int use_nonrd_pick_mode; // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; + // modes are used in order from LSB to MSB for each BLOCK_SIZE. + int inter_mode_mask[BLOCK_SIZES]; // This feature controls whether we do the expensive context update and // calculation in the rd coefficient costing loop. diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 95ea1072d..1b995757a 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -54,7 +54,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; } - lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), + lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level_ms), lc->target_bandwidth, 1000); lrc->bits_off_target = lrc->buffer_level; } @@ -87,14 +87,14 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. - lc->starting_buffer_level = - (int64_t)(oxcf->starting_buffer_level * bitrate_alloc); - lc->optimal_buffer_level = - (int64_t)(oxcf->optimal_buffer_level * bitrate_alloc); - lc->maximum_buffer_size = - (int64_t)(oxcf->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. if (svc->number_temporal_layers > 1) { lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer]; @@ -149,20 +149,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - lrc->max_gf_interval = 16; - - lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) - lrc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) - lrc->max_gf_interval = lrc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { @@ -173,9 +160,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->rc = lc->rc; cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; - cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; - cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; - cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1) { @@ -191,9 +175,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->rc = cpi->rc; lc->twopass = cpi->twopass; lc->target_bandwidth = (int)oxcf->target_bandwidth; - lc->starting_buffer_level = oxcf->starting_buffer_level; - lc->optimal_buffer_level = oxcf->optimal_buffer_level; - lc->maximum_buffer_size = oxcf->maximum_buffer_size; } void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 6881ce1e7..36e2027fd 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -22,9 +22,6 @@ extern "C" { typedef struct { RATE_CONTROL rc; int target_bandwidth; - int64_t starting_buffer_level; - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; double framerate; int avg_frame_size; TWO_PASS twopass; diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 02bed8988..eb5ae2e41 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -156,16 +156,15 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } - -void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { +void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); } -void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { +void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); } diff --git a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm index 8723a7114..28458dcdd 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm +++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm @@ -23,6 +23,7 @@ pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 %endmacro +TRANSFORM_COEFFS 11585, 11585 TRANSFORM_COEFFS 15137, 6270 TRANSFORM_COEFFS 16069, 3196 TRANSFORM_COEFFS 9102, 13623 @@ -83,7 +84,7 @@ SECTION .text %endmacro ; 1D forward 8x8 DCT transform -%macro FDCT8_1D 0 +%macro FDCT8_1D 1 SUM_SUB 0, 7, 9 SUM_SUB 1, 6, 9 SUM_SUB 2, 5, 9 @@ -92,14 +93,21 @@ SECTION .text SUM_SUB 0, 3, 9 SUM_SUB 1, 2, 9 SUM_SUB 6, 5, 9 +%if %1 == 0 SUM_SUB 0, 1, 9 +%endif BUTTERFLY_4X 2, 3, 6270, 15137, m8, 9, 10 pmulhrsw m6, m12 pmulhrsw m5, m12 +%if %1 == 0 pmulhrsw m0, m12 pmulhrsw m1, m12 +%else + BUTTERFLY_4X 1, 0, 11585, 11585, m8, 9, 10 + SWAP 0, 1 +%endif SUM_SUB 4, 5, 9 SUM_SUB 7, 6, 9 @@ -150,10 +158,10 @@ cglobal fdct8x8, 3, 5, 13, input, output, stride psllw m7, 2 ; column transform - FDCT8_1D + FDCT8_1D 0 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 - FDCT8_1D + FDCT8_1D 1 TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9 DIVIDE_ROUND_2X 0, 1, 9, 10 diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm index 673e0b3a6..21aaa9383 100644 --- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm +++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm @@ -43,9 +43,9 @@ sym(vp9_temporal_filter_apply_sse2): mov [rsp + rbp_backup], rbp ; end prolog - mov rdx, arg(3) + mov edx, arg(3) mov [rsp + block_width], rdx - mov rdx, arg(4) + mov edx, arg(4) mov [rsp + block_height], rdx movd xmm6, arg(5) movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read |