diff options
Diffstat (limited to 'vp9/encoder')
28 files changed, 511 insertions, 684 deletions
diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index 47ad8d8cc..0d6b41d15 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -47,11 +47,21 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // Use some of the segments for in frame Q adjustment. for (segment = 1; segment < 2; segment++) { - const int qindex_delta = + int qindex_delta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, in_frame_q_adj_ratio[segment]); - vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + + // For AQ mode 2, we dont allow Q0 in a segment if the base Q is not 0. + // Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment Q delta + // is sometimes applied without going back around the rd loop. + // This could lead to an illegal combination of partition size and q. + if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { + qindex_delta = -cm->base_qindex + 1; + } + if ((cm->base_qindex + qindex_delta) > 0) { + vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); + vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); + } } } } diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 8ef2b2eed..76f5e7bbe 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -20,7 +20,6 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pragmas.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_systemdependent.h" diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index c406860a0..c3cd93b78 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -109,6 +109,7 @@ struct macroblock { MV pred_mv[MAX_REF_FRAMES]; void (*fwd_txm4x4)(const int16_t *input, int16_t *output, int stride); + void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); }; #ifdef __cplusplus diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ef33fcaf1..37fb0f3c6 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -76,18 +76,6 @@ typedef struct { unsigned int var; } diff; -static void get_sse_sum_8x8(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum); -} - -static void get_sse_sum_16x16(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum); -} - static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { @@ -490,8 +478,8 @@ static void choose_partitioning(VP9_COMP *cpi, unsigned int sse = 0; int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) - get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); + vp9_get8x8var(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); } } @@ -1226,9 +1214,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int b_offset = b_mi_row * MI_SIZE * src_stride + b_mi_col * MI_SIZE; - get_sse_sum_16x16(src + b_offset, src_stride, - pre_src + b_offset, pre_stride, - &d16[j].sse, &d16[j].sum); + vp9_get16x16var(src + b_offset, src_stride, + pre_src + b_offset, pre_stride, + &d16[j].sse, &d16[j].sum); d16[j].var = d16[j].sse - (((uint32_t)d16[j].sum * d16[j].sum) >> 8); @@ -1303,14 +1291,14 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, if (row8x8_remaining >= MI_BLOCK_SIZE && col8x8_remaining >= MI_BLOCK_SIZE) { this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, - pre, pre_stride, 0x7fffffff); + pre, pre_stride); threshold = (1 << 12); } else { int r, c; for (r = 0; r < row8x8_remaining; r += 2) for (c = 0; c < col8x8_remaining; c += 2) - this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, pre, - pre_stride, 0x7fffffff); + this_sad += cpi->fn_ptr[BLOCK_16X16].sdf(src, src_stride, + pre, pre_stride); threshold = (row8x8_remaining * col8x8_remaining) << 6; } @@ -1318,7 +1306,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, return x->in_static_area; } -static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { +static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8, + const int motion_thresh) { const int mis = cm->mi_stride; int block_row, block_col; @@ -1327,8 +1316,8 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { for (block_col = 0; block_col < 8; ++block_col) { const MODE_INFO *prev_mi = prev_mi_8x8[block_row * mis + block_col]; if (prev_mi) { - if (abs(prev_mi->mbmi.mv[0].as_mv.row) >= 8 || - abs(prev_mi->mbmi.mv[0].as_mv.col) >= 8) + if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh || + abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh) return 1; } } @@ -2324,7 +2313,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, || cpi->rc.is_src_frame_alt_ref || ((sf->use_lastframe_partitioning == LAST_FRAME_PARTITION_LOW_MOTION) && - sb_has_motion(cm, prev_mi))) { + sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); @@ -2337,7 +2326,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, cpi->pc_root); } else { if (sf->constrain_copy_partition && - sb_has_motion(cm, prev_mi)) + sb_has_motion(cm, prev_mi, sf->lf_motion_threshold)) constrain_copy_partitioning(cpi, tile, mi, prev_mi, mi_row, mi_col, BLOCK_16X16); else @@ -2384,7 +2373,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { if (lossless) { // printf("Switching to lossless\n"); cpi->mb.fwd_txm4x4 = vp9_fwht4x4; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + cpi->mb.itxm_add = vp9_iwht4x4_add; cpi->mb.optimize = 0; cpi->common.lf.filter_level = 0; cpi->zbin_mode_boost_enabled = 0; @@ -2392,7 +2381,7 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { } else { // printf("Not lossless\n"); cpi->mb.fwd_txm4x4 = vp9_fdct4x4; - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + cpi->mb.itxm_add = vp9_idct4x4_add; } } @@ -2869,7 +2858,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (mi_row + hbs < cm->mi_rows) { nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, &rate, &dist, subsize); - pc_tree->horizontal[1].mic.mbmi = mi[0]->mbmi; + pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi; if (rate != INT_MAX && dist != INT64_MAX && *totrate != INT_MAX && *totdist != INT64_MAX) { *totrate += rate; @@ -3368,7 +3357,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); } else { mbmi->skip = 1; - if (output_enabled) + if (output_enabled && + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) cm->counts.skip[vp9_get_skip_context(xd)][1]++; reset_skip_context(xd, MAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 3b231b7f2..8581e6117 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -406,7 +406,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; default: assert(0 && "Invalid transform size"); @@ -428,7 +428,7 @@ static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (p->eobs[block] > 0) - xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); + x->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { @@ -574,7 +574,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. - xd->itxm_add(dqcoeff, dst, dst_stride, *eob); + x->itxm_add(dqcoeff, dst, dst_stride, *eob); else vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 645503103..c955d27c0 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -602,9 +602,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { // is set. cpi->oxcf.worst_allowed_q = 0; cpi->oxcf.best_allowed_q = 0; - cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add; + cpi->mb.itxm_add = vp9_iwht4x4_add; } else { - cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add; + cpi->mb.itxm_add = vp9_idct4x4_add; } rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; @@ -632,9 +632,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->oxcf.maximum_buffer_size = 240000; } - // Convert target bandwidth from Kbit/s to Bit/s - cpi->oxcf.target_bandwidth *= 1000; - cpi->oxcf.starting_buffer_level = vp9_rescale(cpi->oxcf.starting_buffer_level, cpi->oxcf.target_bandwidth, 1000); @@ -2103,8 +2100,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); - vp9_zero(cpi->rd.tx_select_threshes); - #if CONFIG_VP9_POSTPROC if (cpi->oxcf.noise_sensitivity > 0) { int l = 0; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index fce5b28f3..dc3832b16 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -64,8 +64,7 @@ #define MIN_GF_INTERVAL 4 #endif - -// #define LONG_TERM_VBR_CORRECTION +#define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; @@ -170,7 +169,6 @@ static void zero_stats(FIRSTPASS_STATS *section) { section->intra_error = 0.0; section->coded_error = 0.0; section->sr_coded_error = 0.0; - section->ssim_weighted_pred_err = 0.0; section->pcnt_inter = 0.0; section->pcnt_motion = 0.0; section->pcnt_second_ref = 0.0; @@ -195,7 +193,6 @@ static void accumulate_stats(FIRSTPASS_STATS *section, section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; - section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err; section->pcnt_inter += frame->pcnt_inter; section->pcnt_motion += frame->pcnt_motion; section->pcnt_second_ref += frame->pcnt_second_ref; @@ -218,7 +215,6 @@ static void subtract_stats(FIRSTPASS_STATS *section, section->intra_error -= frame->intra_error; section->coded_error -= frame->coded_error; section->sr_coded_error -= frame->sr_coded_error; - section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err; section->pcnt_inter -= frame->pcnt_inter; section->pcnt_motion -= frame->pcnt_motion; section->pcnt_second_ref -= frame->pcnt_second_ref; @@ -242,7 +238,6 @@ static void avg_stats(FIRSTPASS_STATS *section) { section->intra_error /= section->count; section->coded_error /= section->count; section->sr_coded_error /= section->count; - section->ssim_weighted_pred_err /= section->count; section->pcnt_inter /= section->count; section->pcnt_second_ref /= section->count; section->pcnt_neutral /= section->count; @@ -259,86 +254,18 @@ static void avg_stats(FIRSTPASS_STATS *section) { // Calculate a modified Error used in distributing bits between easier and // harder frames. -static double calculate_modified_err(const VP9_COMP *cpi, +static double calculate_modified_err(const TWO_PASS *twopass, + const VP9EncoderConfig *oxcf, const FIRSTPASS_STATS *this_frame) { - const TWO_PASS *twopass = &cpi->twopass; - const SVC *const svc = &cpi->svc; - const FIRSTPASS_STATS *stats; - double av_err; - double modified_error; - - if (svc->number_spatial_layers > 1 && - svc->number_temporal_layers == 1) { - twopass = &svc->layer_context[svc->spatial_layer_id].twopass; - } - - stats = &twopass->total_stats; - av_err = stats->ssim_weighted_pred_err / stats->count; - modified_error = av_err * pow(this_frame->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(av_err), - cpi->oxcf.two_pass_vbrbias / 100.0); - + const FIRSTPASS_STATS *const stats = &twopass->total_stats; + const double av_err = stats->coded_error / stats->count; + const double modified_error = av_err * + pow(this_frame->coded_error / DOUBLE_DIVIDE_CHECK(av_err), + oxcf->two_pass_vbrbias / 100.0); return fclamp(modified_error, twopass->modified_error_min, twopass->modified_error_max); } -static const double weight_table[256] = { - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, - 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.031250, 0.062500, - 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, 0.250000, 0.281250, - 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, 0.500000, - 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, - 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, - 0.968750, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, - 1.000000, 1.000000, 1.000000, 1.000000 -}; - -static double simple_weight(const YV12_BUFFER_CONFIG *buf) { - int i, j; - double sum = 0.0; - const int w = buf->y_crop_width; - const int h = buf->y_crop_height; - const uint8_t *row = buf->y_buffer; - - for (i = 0; i < h; ++i) { - const uint8_t *pixel = row; - for (j = 0; j < w; ++j) - sum += weight_table[*pixel++]; - row += buf->y_stride; - } - - return MAX(0.1, sum / (w * h)); -} - // This function returns the maximum target rate per frame. static int frame_max_bits(const RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { @@ -533,6 +460,7 @@ void vp9_first_pass(VP9_COMP *cpi) { TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; + FIRSTPASS_STATS fps; vp9_clear_system_state(); @@ -830,14 +758,11 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_clear_system_state(); { - FIRSTPASS_STATS fps; - fps.frame = cm->current_video_frame; fps.spatial_layer_id = cpi->svc.spatial_layer_id; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); fps.sr_coded_error = (double)(sr_coded_error >> 8); - fps.ssim_weighted_pred_err = fps.coded_error * simple_weight(cpi->Source); fps.count = 1.0; fps.pcnt_inter = (double)intercount / cm->MBs; fps.pcnt_second_ref = (double)second_ref_count / cm->MBs; @@ -871,8 +796,7 @@ void vp9_first_pass(VP9_COMP *cpi) { fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start); // Don't want to do output stats with a stack variable! - twopass->this_frame_stats = fps; - output_stats(&twopass->this_frame_stats, cpi->output_pkt_list); + output_stats(&fps, cpi->output_pkt_list); accumulate_stats(&twopass->total_stats, &fps); } @@ -880,9 +804,9 @@ void vp9_first_pass(VP9_COMP *cpi) { // the prediction is good enough... but also don't allow it to lag too far. if ((twopass->sr_update_lag > 3) || ((cm->current_video_frame > 0) && - (twopass->this_frame_stats.pcnt_inter > 0.20) && - ((twopass->this_frame_stats.intra_error / - DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { + (fps.pcnt_inter > 0.20) && + ((fps.intra_error / + DOUBLE_DIVIDE_CHECK(fps.coded_error)) > 2.0))) { if (gld_yv12 != NULL) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } @@ -1043,25 +967,19 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // Scan the first pass file and calculate a modified total error based upon // the bias/power function used to allocate bits. { - const FIRSTPASS_STATS *const start_pos = twopass->stats_in; - FIRSTPASS_STATS this_frame; - const double av_error = stats->ssim_weighted_pred_err / - DOUBLE_DIVIDE_CHECK(stats->count); - - - twopass->modified_error_total = 0.0; - twopass->modified_error_min = - (av_error * oxcf->two_pass_vbrmin_section) / 100; - twopass->modified_error_max = - (av_error * oxcf->two_pass_vbrmax_section) / 100; - - while (input_stats(twopass, &this_frame) != EOF) { - twopass->modified_error_total += - calculate_modified_err(cpi, &this_frame); + const double avg_error = stats->coded_error / + DOUBLE_DIVIDE_CHECK(stats->count); + const FIRSTPASS_STATS *s = twopass->stats_in; + double modified_error_total = 0.0; + twopass->modified_error_min = (avg_error * + oxcf->two_pass_vbrmin_section) / 100; + twopass->modified_error_max = (avg_error * + oxcf->two_pass_vbrmax_section) / 100; + while (s < twopass->stats_in_end) { + modified_error_total += calculate_modified_err(twopass, oxcf, s); + ++s; } - twopass->modified_error_left = twopass->modified_error_total; - - reset_fpf_position(twopass, start_pos); + twopass->modified_error_left = modified_error_total; } // Reset the vbr bits off target counter @@ -1182,16 +1100,17 @@ static void accumulate_frame_motion_stats( } // Calculate a baseline boost number for the current frame. -static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame, +static double calc_frame_boost(const TWO_PASS *twopass, + const FIRSTPASS_STATS *this_frame, double this_frame_mv_in_out) { double frame_boost; // Underlying boost factor is based on inter intra error ratio. - if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) + if (this_frame->intra_error > twopass->gf_intra_err_min) frame_boost = (IIFACTOR * this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); else - frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / + frame_boost = (IIFACTOR * twopass->gf_intra_err_min / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); // Increase boost for frames where new data coming into frame (e.g. zoom out). @@ -1244,8 +1163,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += (decay_accumulator * - calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); + boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame, + this_frame_mv_in_out); } *f_boost = (int)boost_score; @@ -1281,8 +1200,8 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, ? MIN_DECAY_FACTOR : decay_accumulator; } - boost_score += (decay_accumulator * - calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out)); + boost_score += decay_accumulator * calc_frame_boost(twopass, &this_frame, + this_frame_mv_in_out); } *b_boost = (int)boost_score; @@ -1500,6 +1419,66 @@ static int calculate_boost_bits(int frame_count, return MAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); } +static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, + double group_error, int gf_arf_bits) { + RATE_CONTROL *const rc = &cpi->rc; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; + TWO_PASS *twopass = &cpi->twopass; + FIRSTPASS_STATS frame_stats; + int i; + int group_frame_index = 1; + int target_frame_size; + int key_frame; + const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); + int64_t total_group_bits = gf_group_bits; + double modified_err = 0.0; + double err_fraction; + + key_frame = cpi->common.frame_type == KEY_FRAME || + vp9_is_upper_layer_key_frame(cpi); + + // For key frames the frame target rate is already set and it + // is also the golden frame. + // NOTE: We dont bother to check for the special case of ARF overlay + // frames here, as there is clamping code for this in the function + // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass + // encodes. + if (!key_frame) { + twopass->gf_group_bit_allocation[0] = gf_arf_bits; + + // Step over the golden frame / overlay frame + if (EOF == input_stats(twopass, &frame_stats)) + return; + } + + // Store the bits to spend on the ARF if there is one. + if (rc->source_alt_ref_pending) { + twopass->gf_group_bit_allocation[group_frame_index++] = gf_arf_bits; + } + + // Deduct the boost bits for arf or gf if it is not a key frame. + if (rc->source_alt_ref_pending || !key_frame) + total_group_bits -= gf_arf_bits; + + // Allocate bits to the other frames in the group. + for (i = 0; i < rc->baseline_gf_interval - 1; ++i) { + if (EOF == input_stats(twopass, &frame_stats)) + break; + + modified_err = calculate_modified_err(twopass, oxcf, &frame_stats); + + if (group_error > 0) + err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error); + else + err_fraction = 0.0; + + target_frame_size = (int)((double)total_group_bits * err_fraction); + target_frame_size = clamp(target_frame_size, 0, + MIN(max_bits, (int)total_group_bits)); + + twopass->gf_group_bit_allocation[group_frame_index++] = target_frame_size; + } +} // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { @@ -1509,6 +1488,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; int i; + double boost_score = 0.0; double old_boost_score = 0.0; double gf_group_err = 0.0; @@ -1532,14 +1512,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int b_boost = 0; int flash_detected; int active_max_gf_interval; + int64_t gf_group_bits; + double gf_group_error_left; + int gf_arf_bits; + + // Reset the GF group data structures unless this is a key + // frame in which case it will already have been done. + if (cpi->common.frame_type != KEY_FRAME) { + twopass->gf_group_index = 0; + vp9_zero(twopass->gf_group_bit_allocation); + } vp9_clear_system_state(); vp9_zero(next_frame); - twopass->gf_group_bits = 0; + gf_group_bits = 0; // Load stats for the current frame. - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); // Note the error of the frame at the start of the group. This will be // the GF frame error if we code a normal gf. @@ -1571,7 +1561,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { ++i; // Accumulate error score of frames in this gf group. - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); gf_group_err += mod_frame_err; if (EOF == input_stats(twopass, &next_frame)) @@ -1610,12 +1600,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } // Calculate a boost number for this frame. - boost_score += (decay_accumulator * - calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out)); + boost_score += decay_accumulator * calc_frame_boost(twopass, &next_frame, + this_frame_mv_in_out); // Break out conditions. if ( - // Break at cpi->max_gf_interval unless almost totally static. + // Break at active_max_gf_interval unless almost totally static. (i >= active_max_gf_interval && (zero_motion_accumulator < 0.995)) || ( // Don't break out with a very short interval. @@ -1646,7 +1636,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { break; if (i < rc->frames_to_key) { - mod_frame_err = calculate_modified_err(cpi, this_frame); + mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame); gf_group_err += mod_frame_err; } } @@ -1742,7 +1732,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { reset_fpf_position(twopass, start_pos); // Calculate the bits to be allocated to the gf/arf group as a whole - twopass->gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); + gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); // Calculate the extra bits to be used for boosted frame(s) { @@ -1753,19 +1743,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { boost = clamp(boost, 125, (rc->baseline_gf_interval + 1) * 200); // Calculate the extra bits to be used for boosted frame(s) - twopass->gf_bits = calculate_boost_bits(rc->baseline_gf_interval, - boost, twopass->gf_group_bits); - - - // For key frames the frame target rate is set already. - // NOTE: We dont bother to check for the special case of ARF overlay - // frames here, as there is clamping code for this in the function - // vp9_rc_clamp_pframe_target_size(), which applies to one and two pass - // encodes. - if (cpi->common.frame_type != KEY_FRAME && - !vp9_is_upper_layer_key_frame(cpi)) { - vp9_rc_set_frame_target(cpi, twopass->gf_bits); - } + gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, + boost, gf_group_bits); } // Adjust KF group bits and error remaining. @@ -1778,14 +1757,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // For normal GFs remove the score for the GF itself unless this is // also a key frame in which case it has already been accounted for. if (rc->source_alt_ref_pending) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - mod_frame_err); + gf_group_error_left = gf_group_err - mod_frame_err; } else if (cpi->common.frame_type != KEY_FRAME) { - twopass->gf_group_error_left = (int64_t)(gf_group_err - - gf_first_frame_err); + gf_group_error_left = gf_group_err - gf_first_frame_err; } else { - twopass->gf_group_error_left = (int64_t)gf_group_err; + gf_group_error_left = gf_group_err; } + // Allocate bits to each of the frames in the GF group. + allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits); + + // Reset the file position. + reset_fpf_position(twopass, start_pos); + // Calculate a section intra ratio used in setting max loop filter. if (cpi->common.frame_type != KEY_FRAME) { twopass->section_intra_rating = @@ -1794,37 +1778,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } -// Allocate bits to a normal frame that is neither a gf an arf or a key frame. -static void assign_std_frame_bits(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - TWO_PASS *twopass = &cpi->twopass; - // For a single frame. - const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); - // Calculate modified prediction error used in bit allocation. - const double modified_err = calculate_modified_err(cpi, this_frame); - int target_frame_size; - double err_fraction; - - if (twopass->gf_group_error_left > 0) - // What portion of the remaining GF group error is used by this frame. - err_fraction = modified_err / twopass->gf_group_error_left; - else - err_fraction = 0.0; - - // How many of those bits available for allocation should we give it? - target_frame_size = (int)((double)twopass->gf_group_bits * err_fraction); - - // Clip target size to 0 - max_bits (or cpi->twopass.gf_group_bits) at - // the top end. - target_frame_size = clamp(target_frame_size, 0, - MIN(max_bits, (int)twopass->gf_group_bits)); - - // Adjust error and bits remaining. - twopass->gf_group_error_left -= (int64_t)modified_err; - - // Per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, target_frame_size); -} - static int test_candidate_kf(TWO_PASS *twopass, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *this_frame, @@ -1906,10 +1859,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const FIRSTPASS_STATS first_frame = *this_frame; const FIRSTPASS_STATS *const start_position = twopass->stats_in; FIRSTPASS_STATS next_frame; FIRSTPASS_STATS last_frame; + int kf_bits = 0; double decay_accumulator = 1.0; double zero_motion_accumulator = 1.0; double boost_score = 0.0; @@ -1921,6 +1876,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { cpi->common.frame_type = KEY_FRAME; + // Reset the GF group data structures. + twopass->gf_group_index = 0; + vp9_zero(twopass->gf_group_bit_allocation); + // Is this a forced key frame by interval. rc->this_key_frame_forced = rc->next_key_frame_forced; @@ -1935,14 +1894,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0; // Group modified error score. - kf_mod_err = calculate_modified_err(cpi, this_frame); + kf_mod_err = calculate_modified_err(twopass, oxcf, this_frame); // Find the next keyframe. i = 0; while (twopass->stats_in < twopass->stats_in_end && rc->frames_to_key < cpi->oxcf.key_freq) { // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, this_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, this_frame); // Load the next frame's stats. last_frame = *this_frame; @@ -2004,7 +1963,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Rescan to get the correct error data for the forced kf group. for (i = 0; i < rc->frames_to_key; ++i) { - kf_group_err += calculate_modified_err(cpi, &tmp_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, &tmp_frame); input_stats(twopass, &tmp_frame); } rc->next_key_frame_forced = 1; @@ -2018,7 +1977,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Special case for the last key frame of the file. if (twopass->stats_in >= twopass->stats_in_end) { // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, this_frame); + kf_group_err += calculate_modified_err(twopass, oxcf, this_frame); } // Calculate the number of bits that should be assigned to the kf group. @@ -2086,6 +2045,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { } } + reset_fpf_position(twopass, start_position); + // Store the zero motion percentage twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); @@ -2102,13 +2063,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (rc->kf_boost < MIN_KF_BOOST) rc->kf_boost = MIN_KF_BOOST; - twopass->kf_bits = calculate_boost_bits((rc->frames_to_key - 1), - rc->kf_boost, twopass->kf_group_bits); + kf_bits = calculate_boost_bits((rc->frames_to_key - 1), + rc->kf_boost, twopass->kf_group_bits); - twopass->kf_group_bits -= twopass->kf_bits; + twopass->kf_group_bits -= kf_bits; - // Per frame bit target for this frame. - vp9_rc_set_frame_target(cpi, twopass->kf_bits); + // Save the bits to spend on the key frame. + twopass->gf_group_bit_allocation[0] = kf_bits; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); @@ -2144,7 +2105,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; - int target; + int target_rate; LAYER_CONTEXT *lc = NULL; const int is_spatial_svc = (cpi->use_svc && cpi->svc.number_temporal_layers == 1); @@ -2160,16 +2121,23 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (!twopass->stats_in) return; + // Increment the gf group index. + ++twopass->gf_group_index; + + // If this is an arf frame then we dont want to read the stats file or + // advance the input pointer as we already have what we need. if (cpi->refresh_alt_ref_frame) { - int modified_target = twopass->gf_bits; - rc->base_frame_target = twopass->gf_bits; - cm->frame_type = INTER_FRAME; + int target_rate; + target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; + target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); + rc->base_frame_target = target_rate; #ifdef LONG_TERM_VBR_CORRECTION // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == RC_MODE_VBR) - vbr_rate_correction(&modified_target, rc->vbr_bits_off_target); + vbr_rate_correction(&target_rate, rc->vbr_bits_off_target); #endif - vp9_rc_set_frame_target(cpi, modified_target); + vp9_rc_set_frame_target(cpi, target_rate); + cm->frame_type = INTER_FRAME; return; } @@ -2197,11 +2165,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (EOF == input_stats(twopass, &this_frame)) return; + // Local copy of the current frame's first pass stats. + this_frame_copy = this_frame; + // Keyframe and section processing. if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { // Define next KF group and assign bits to it. - this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); } else { cm->frame_type = INTER_FRAME; @@ -2220,11 +2190,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } } - // Is this frame a GF / ARF? (Note: a key frame is always also a GF). + // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { - // Define next gf group and assign bits to it. - this_frame_copy = this_frame; - #if CONFIG_MULTIPLE_ARF if (cpi->multi_arf_enabled) { define_fixed_arf_period(cpi); @@ -2247,11 +2214,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->refresh_golden_frame = 1; - } else { - // Otherwise this is an ordinary frame. - // Assign bits from those allocated to the GF group. - this_frame_copy = this_frame; - assign_std_frame_bits(cpi, &this_frame_copy); } { @@ -2262,18 +2224,19 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } } + target_rate = twopass->gf_group_bit_allocation[twopass->gf_group_index]; if (cpi->common.frame_type == KEY_FRAME) - target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target); + target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); else - target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target); + target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); - rc->base_frame_target = target; + rc->base_frame_target = target_rate; #ifdef LONG_TERM_VBR_CORRECTION // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == RC_MODE_VBR) - vbr_rate_correction(&target, rc->vbr_bits_off_target); + vbr_rate_correction(&target_rate, rc->vbr_bits_off_target); #endif - vp9_rc_set_frame_target(cpi, target); + vp9_rc_set_frame_target(cpi, target_rate); // Update the total stats remaining structure. subtract_stats(&twopass->total_left_stats, &this_frame); @@ -2322,8 +2285,6 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { } else { #endif twopass->kf_group_bits -= bits_used; - twopass->gf_group_bits -= bits_used; - twopass->gf_group_bits = MAX(twopass->gf_group_bits, 0); } twopass->kf_group_bits = MAX(twopass->kf_group_bits, 0); } diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index d84793e28..c89cfaf8d 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -11,6 +11,8 @@ #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ #define VP9_ENCODER_VP9_FIRSTPASS_H_ +#include "vp9/encoder/vp9_lookahead.h" + #ifdef __cplusplus extern "C" { #endif @@ -42,7 +44,6 @@ typedef struct { unsigned int section_intra_rating; unsigned int next_iiratio; FIRSTPASS_STATS total_stats; - FIRSTPASS_STATS this_frame_stats; const FIRSTPASS_STATS *stats_in; const FIRSTPASS_STATS *stats_in_start; const FIRSTPASS_STATS *stats_in_end; @@ -51,31 +52,24 @@ typedef struct { int64_t bits_left; double modified_error_min; double modified_error_max; - double modified_error_total; double modified_error_left; double kf_intra_err_min; double gf_intra_err_min; - int kf_bits; - // Remaining error from uncoded frames in a gf group. Two pass use only - int64_t gf_group_error_left; // Projected total bits available for a key frame group of frames int64_t kf_group_bits; // Error score of frames still to be coded in kf group int64_t kf_group_error_left; - - // Projected Bits available for a group of frames including 1 GF or ARF - int64_t gf_group_bits; - // Bits for the golden frame or ARF - 2 pass only - int gf_bits; - int sr_update_lag; int kf_zeromotion_pct; int gf_zeromotion_pct; int active_worst_quality; + + int gf_group_index; + int gf_group_bit_allocation[MAX_LAG_BUFFERS * 2]; } TWO_PASS; struct VP9_COMP; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 041e583fd..842bc5b9d 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -72,8 +72,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, x->mv_row_max = tmp_row_max; return vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, - INT_MAX); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); } static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, @@ -86,8 +85,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -123,8 +121,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride, - INT_MAX); + xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; @@ -147,7 +144,7 @@ static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0, 0); err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); + xd->plane[0].dst.buf, xd->plane[0].dst.stride); // find best if (err < best_err) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 4f7d6f17c..dbd19a2d6 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -524,9 +524,8 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv, - sad_per_bit); + get_buf_from_mv(in_what, ref_mv), in_what->stride) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -542,7 +541,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[t][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -553,7 +552,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -585,7 +584,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -596,7 +595,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -623,7 +622,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + candidates[s][next_chkpts_indices[i]].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -634,7 +633,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -661,7 +660,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, bc + neighbors[i].col}; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } else { @@ -672,7 +671,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, bestsad); + in_what->stride); CHECK_BETTER } } @@ -894,8 +893,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, *best_mv = *ref_mv; *num00 = 11; best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); start_row = MAX(-range, x->mv_row_min - ref_mv->row); start_col = MAX(-range, x->mv_col_min - ref_mv->col); @@ -929,7 +927,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, for (i = 0; i < end_col - c; ++i) { const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -975,7 +973,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, // Check the starting position best_sad = fn_ptr->sdf(what->buf, what->stride, - best_address, in_what->stride, 0x7fffffff) + + best_address, in_what->stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -986,8 +984,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_mv->col + ss[i].mv.col}; if (is_mv_in(x, &mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, - best_address + ss[i].offset, in_what->stride, - best_sad); + best_address + ss[i].offset, in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1012,7 +1009,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { int sad = fn_ptr->sdf(what->buf, what->stride, best_address + ss[best_site].offset, - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { @@ -1077,7 +1074,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, best_address = in_what; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1129,7 +1126,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[i].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); @@ -1154,7 +1151,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &this_mv)) { const uint8_t *const check_here = ss[best_site].offset + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, - in_what_stride, bestsad); + in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); @@ -1253,7 +1250,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1261,7 +1258,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, for (c = col_min; c < col_max; ++c) { const MV mv = {r, c}; const int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + + get_buf_from_mv(in_what, &mv), in_what->stride) + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; @@ -1286,7 +1283,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1320,7 +1317,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1351,7 +1348,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; @@ -1409,7 +1406,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, while (c < col_max) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - check_here, in_what->stride, best_sad); + check_here, in_what->stride); if (sad < best_sad) { const MV mv = {r, c}; sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); @@ -1438,7 +1435,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1450,7 +1447,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, ref_mv->col + neighbors[j].col}; if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1483,7 +1480,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, - in_what->stride, 0x7fffffff) + + in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1524,7 +1521,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride, best_sad); + in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -1563,8 +1560,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride, - second_pred, 0x7fffffff) + + get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; @@ -1577,8 +1573,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, if (is_mv_in(x, &mv)) { unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred, best_sad); + get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 437b68078..913b8ead4 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -280,13 +280,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int rate_mv = 0; - if (cpi->sf.disable_inter_mode_mask[bsize] & - (1 << INTER_OFFSET(this_mode))) + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; - if (best_rd < ((int64_t)rd_threshes[mode_idx[this_mode]] * - rd_thresh_freq_fact[this_mode] >> 5) || - rd_threshes[mode_idx[this_mode]] == INT_MAX) + if (rd_less_than_thresh(best_rd, rd_threshes[mode_idx[this_mode]], + rd_thresh_freq_fact[this_mode])) continue; if (this_mode == NEWMV) { diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index a04622c8c..b58eac981 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -200,9 +200,8 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { oxcf->best_allowed_q) / 2; } - rc->last_q[0] = oxcf->best_allowed_q; - rc->last_q[1] = oxcf->best_allowed_q; - rc->last_q[2] = oxcf->best_allowed_q; + rc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + rc->last_q[INTER_FRAME] = oxcf->best_allowed_q; rc->buffer_level = oxcf->starting_buffer_level; rc->bits_off_target = oxcf->starting_buffer_level; @@ -213,6 +212,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->long_rolling_actual_bits = rc->avg_frame_bandwidth; rc->total_actual_bits = 0; + rc->total_target_bits = 0; rc->total_target_vs_actual = 0; rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; @@ -607,13 +607,27 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, return q; } +static int get_active_cq_level(const RATE_CONTROL *rc, + const VP9EncoderConfig *const oxcf) { + static const double cq_adjust_threshold = 0.5; + int active_cq_level = oxcf->cq_level; + if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY && + rc->total_target_bits > 0) { + const double x = (double)rc->total_actual_bits / rc->total_target_bits; + if (x < cq_adjust_threshold) { + active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold); + } + } + return active_cq_level; +} + static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int *bottom_index, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int cq_level = oxcf->cq_level; + const int cq_level = get_active_cq_level(rc, oxcf); int active_best_quality; int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); int q; @@ -791,7 +805,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int cq_level = oxcf->cq_level; + const int cq_level = get_active_cq_level(rc, oxcf); int active_best_quality; int active_worst_quality = cpi->twopass.active_worst_quality; int q; @@ -1075,7 +1089,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) { - rc->last_q[2] = qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2); } else { @@ -1375,6 +1388,24 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } +void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc) { + // Set Maximum gf/arf interval + rc->max_gf_interval = 16; + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = oxcf->key_freq >> 1; + + // Special conditions when alt ref frame enabled + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; +} + void vp9_rc_update_framerate(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; @@ -1399,21 +1430,5 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) { rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - // Special conditions when alt ref frame enabled in lagged compress mode - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (rc->max_gf_interval > oxcf->lag_in_frames - 1) - rc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, rc); } diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index b1cc67609..d6a0151b6 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -30,7 +30,7 @@ typedef struct { int this_frame_target; // Actual frame target after rc adjustment. int projected_frame_size; int sb64_target_rate; - int last_q[3]; // Separate values for Intra/Inter/ARF-GF + int last_q[FRAME_TYPES]; // Separate values for Intra/Inter int last_boosted_qindex; // Last boosted GF/KF/ARF q int gfu_boost; @@ -178,6 +178,9 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, void vp9_rc_update_framerate(struct VP9_COMP *cpi); +void vp9_rc_set_gf_max_interval(const struct VP9EncoderConfig *const oxcf, + RATE_CONTROL *const rc); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 601e64d39..f68aa2738 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -9,7 +9,6 @@ */ #include <assert.h> -#include <limits.h> #include <math.h> #include <stdio.h> @@ -22,7 +21,6 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_mvref_common.h" -#include "vp9/common/vp9_pragmas.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" @@ -1677,9 +1675,9 @@ static INLINE int mv_has_subpel(const MV *mv) { static int check_best_zero_mv( const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int disable_inter_mode_mask, int this_mode, + int inter_mode_mask, int this_mode, const MV_REFERENCE_FRAME ref_frames[2]) { - if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && + if ((inter_mode_mask & (1 << ZEROMV)) && (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == 0 && (ref_frames[1] == NONE || @@ -1745,7 +1743,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); - const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(*bsi); @@ -1794,11 +1792,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; - if (disable_inter_mode_mask & (1 << mode_idx)) + if (!(inter_mode_mask & (1 << this_mode))) continue; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, + inter_mode_mask, this_mode, mbmi->ref_frame)) continue; @@ -2131,8 +2129,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride, - 0x7fffffff); + ref_y_ptr, ref_y_stride); // Note if it is the best so far. if (this_sad < best_sad) { @@ -2210,8 +2207,6 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm, static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, - int_mv *ref_mv, - int_mv *second_ref_mv, int64_t comp_pred_diff[REFERENCE_MODES], const int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { @@ -2991,11 +2986,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, ctx->mic = *xd->mi[0]; } -static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, - int thresh_fact) { - return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; -} - // Updating rd_thresh_freq_fact[] here means that the different // partition/block sizes are handled independently based on the best // choice for the current partition. It may well be better to keep a scaled @@ -3073,7 +3063,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; - int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize]; + int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; vp9_zero(best_mbmode); x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3139,6 +3129,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { const int inter_non_zero_mode_mask = 0x1F7F7; mode_skip_mask |= inter_non_zero_mode_mask; + mode_skip_mask &= ~(1 << THR_ZEROMV); + inter_mode_mask = (1 << ZEROMV); } // Disable this drop out case if the ref frame @@ -3172,7 +3164,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (bsize > cpi->sf.max_intra_bsize) { - mode_skip_mask |= 0xFF30808; + const int all_intra_modes = (1 << THR_DC) | (1 << THR_TM) | + (1 << THR_H_PRED) | (1 << THR_V_PRED) | (1 << THR_D135_PRED) | + (1 << THR_D207_PRED) | (1 << THR_D153_PRED) | (1 << THR_D63_PRED) | + (1 << THR_D117_PRED) | (1 << THR_D45_PRED); + mode_skip_mask |= all_intra_modes; } if (!x->in_active_map) { @@ -3186,7 +3182,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_index = THR_ZEROMV; mode_skip_mask = ~(1 << mode_index); mode_skip_start = MAX_MODES; - disable_inter_mode_mask = 0; + inter_mode_mask = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | + (1 << NEWMV); } for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { @@ -3228,13 +3225,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Test best rd so far against threshold for trying this mode. if (rd_less_than_thresh(best_rd, rd_threshes[mode_index], - rd_thresh_freq_fact[mode_index])) + rd_thresh_freq_fact[mode_index])) continue; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; - if (ref_frame != INTRA_FRAME && - disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode))) + if (ref_frame != INTRA_FRAME && !(inter_mode_mask & (1 << this_mode))) continue; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; @@ -3283,7 +3279,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, this_mode, ref_frames)) + inter_mode_mask, this_mode, ref_frames)) continue; } } @@ -3621,9 +3617,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; @@ -3672,7 +3665,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; - int ref_frame_mask = 0; int mode_skip_mask = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -3707,17 +3699,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; - ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) { - ref_frame_mask |= (1 << ref_frame); - break; - } - } - } - for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; @@ -3812,11 +3793,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) && - ref_frame != INTRA_FRAME) { - continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. @@ -4041,15 +4017,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - // Is Mb level skip allowed (i.e. not coded at segment level). - const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id, - SEG_LVL_SKIP); + // Skip is never coded at the segment level for sub8x8 blocks and instead + // always coded in the bitstream at the mode info level. - if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) { + if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { // Add in the cost of the no skip flag. @@ -4064,7 +4035,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate_uv = 0; this_skip2 = 1; } - } else if (mb_skip_allowed) { + } else { // Add in the cost of the no skip flag. rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } @@ -4254,9 +4225,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_ref_index, - &mbmi->ref_mvs[mbmi->ref_frame[0]][0], - &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : - mbmi->ref_frame[1]][0], best_pred_diff, best_tx_diff, best_filter_diff); return best_rd; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index b6b51e553..5ea09a8a7 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -11,6 +11,8 @@ #ifndef VP9_ENCODER_VP9_RDOPT_H_ #define VP9_ENCODER_VP9_RDOPT_H_ +#include <limits.h> + #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus @@ -87,6 +89,11 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); +static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, + int thresh_fact) { + return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; +} + static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 892e90551..d06263676 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -35,14 +35,12 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride, #define sadMxN(m, n) \ unsigned int vp9_sad##m##x##n##_c(const uint8_t *src, int src_stride, \ - const uint8_t *ref, int ref_stride, \ - unsigned int max_sad) { \ + const uint8_t *ref, int ref_stride) { \ return sad(src, src_stride, ref, ref_stride, m, n); \ } \ unsigned int vp9_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ - const uint8_t *second_pred, \ - unsigned int max_sad) { \ + const uint8_t *second_pred) { \ uint8_t comp_pred[m * n]; \ vp9_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return sad(src, src_stride, comp_pred, m, m, n); \ @@ -54,8 +52,7 @@ void vp9_sad##m##x##n##x##k##_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < k; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, &ref[i], ref_stride); \ } #define sadMxNx4D(m, n) \ @@ -64,8 +61,7 @@ void vp9_sad##m##x##n##x4d_c(const uint8_t *src, int src_stride, \ unsigned int *sads) { \ int i; \ for (i = 0; i < 4; ++i) \ - sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride, \ - 0x7fffffff); \ + sads[i] = vp9_sad##m##x##n##_c(src, src_stride, refs[i], ref_stride); \ } // 64x64 diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 7b2d1e2f0..15b986197 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -14,20 +14,23 @@ #include "vp9/encoder/vp9_speed_features.h" enum { - ALL_INTRA_MODES = (1 << DC_PRED) | + INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) | (1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED), - - INTRA_DC_ONLY = (1 << DC_PRED), - - INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED), - + INTRA_DC = (1 << DC_PRED), + INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), + INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | + (1 << H_PRED) +}; - INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED) +enum { + INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), + INTER_NEAREST = (1 << NEARESTMV), + INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) }; enum { @@ -80,12 +83,16 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } if (speed >= 2) { - if (MIN(cm->width, cm->height) >= 720) + if (MIN(cm->width, cm->height) >= 720) { + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->last_partitioning_redo_frequency = 3; sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - else + } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - + sf->last_partitioning_redo_frequency = 2; + sf->lf_motion_threshold = NO_MOITION_THRESHOLD; + } sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH | @@ -97,7 +104,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; - sf->last_partitioning_redo_frequency = 3; } if (speed >= 3) { @@ -108,6 +114,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, else sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; + sf->last_partitioning_redo_frequency = 3; sf->recode_loop = ALLOW_RECODE_KFMAXBW; sf->adaptive_rd_thresh = 3; sf->mode_skip_start = 6; @@ -135,20 +143,23 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->search_method = HEX; sf->disable_filter_search_var_thresh = 500; for (i = 0; i < TX_SIZES; ++i) { - sf->intra_y_mode_mask[i] = INTRA_DC_ONLY; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[i] = INTRA_DC; + sf->intra_uv_mode_mask[i] = INTRA_DC; } cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; } } -static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, +static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { + VP9_COMMON *const cm = &cpi->common; + const int frames_since_key = + cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key; sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; sf->use_fast_coef_costing = 1; - if (speed == 1) { + if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD @@ -171,13 +182,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, } if (speed >= 2) { - sf->use_square_partition_only = !frame_is_intra_only(cm); - sf->less_rectangular_check = 1; - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) - sf->disable_split_mask = cm->show_frame ? - DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; + sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT + : DISABLE_ALL_INTER_SPLIT; else sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; @@ -185,27 +192,18 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; - sf->use_rd_breakout = 1; - sf->adaptive_motion_search = 1; sf->adaptive_pred_interp_filter = 2; - sf->auto_mv_step_size = 1; sf->reference_masking = 1; - sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; + sf->lf_motion_threshold = LOW_MOITION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; - - sf->adaptive_rd_thresh = 2; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } if (speed >= 3) { @@ -233,19 +231,19 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->auto_min_max_partition_size = STRICT_NEIGHBORING_MIN_MAX; sf->adjust_partitioning_from_last_frame = cm->last_frame_type != cm->frame_type || (0 == - (cm->current_video_frame + 1) % sf->last_partitioning_redo_frequency); + (frames_since_key + 1) % sf->last_partitioning_redo_frequency); sf->subpel_force_stop = 1; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; + sf->intra_uv_mode_mask[i] = INTRA_DC; } - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_ONLY; + sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->search_method = FAST_HEX; - sf->disable_inter_mode_mask[BLOCK_32X32] = 1 << INTER_OFFSET(ZEROMV); - sf->disable_inter_mode_mask[BLOCK_32X64] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X32] = ~(1 << INTER_OFFSET(NEARESTMV)); - sf->disable_inter_mode_mask[BLOCK_64X64] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW; + sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST; + sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST; sf->max_intra_bsize = BLOCK_32X32; sf->allow_skip_recode = 1; } @@ -254,9 +252,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->max_partition_size = BLOCK_32X32; sf->min_partition_size = BLOCK_8X8; sf->partition_check = - (cm->current_video_frame % sf->last_partitioning_redo_frequency == 1); + (frames_since_key % sf->last_partitioning_redo_frequency == 1); sf->force_frame_boost = cm->frame_type == KEY_FRAME || - (cm->current_video_frame % + (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15; sf->partition_search_type = REFERENCE_PARTITION; @@ -276,7 +274,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, if (speed >= 7) { int i; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = ~(1 << INTER_OFFSET(NEARESTMV)); + sf->inter_mode_mask[i] = INTER_NEAREST; } } @@ -321,8 +319,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_split_var_thresh = 0; sf->disable_filter_search_var_thresh = 0; for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = ALL_INTRA_MODES; - sf->intra_uv_mode_mask[i] = ALL_INTRA_MODES; + sf->intra_y_mode_mask[i] = INTRA_ALL; + sf->intra_uv_mode_mask[i] = INTRA_ALL; } sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; @@ -334,7 +332,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->use_nonrd_pick_mode = 0; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 0; + sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. @@ -356,7 +354,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { set_good_speed_feature(cpi, cm, sf, oxcf->speed); break; case REALTIME: - set_rt_speed_feature(cm, sf, oxcf->speed); + set_rt_speed_feature(cpi, sf, oxcf->speed); break; } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index d8c1a8be2..3e7cd27d8 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -44,6 +44,11 @@ typedef enum { } SUBPEL_SEARCH_METHODS; typedef enum { + NO_MOITION_THRESHOLD = 0, + LOW_MOITION_THRESHOLD = 7 +} MOTION_THRESHOLD; + +typedef enum { LAST_FRAME_PARTITION_OFF = 0, LAST_FRAME_PARTITION_LOW_MOTION = 1, LAST_FRAME_PARTITION_ALL = 2 @@ -200,6 +205,10 @@ typedef struct SPEED_FEATURES { // partitioning. LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning; + // The threshold is to determine how slow the motino is, it is used when + // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION + MOTION_THRESHOLD lf_motion_threshold; + // Determine which method we use to determine transform size. We can choose // between options like full rd, largest for prediction size, largest // for intra and model coefs for the rest. @@ -322,8 +331,8 @@ typedef struct SPEED_FEATURES { int use_nonrd_pick_mode; // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV - // modes are disabled in order from LSB to MSB for each BLOCK_SIZE. - int disable_inter_mode_mask[BLOCK_SIZES]; + // modes are used in order from LSB to MSB for each BLOCK_SIZE. + int inter_mode_mask[BLOCK_SIZES]; // This feature controls whether we do the expensive context update and // calculation in the rd coefficient costing loop. diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index dd28496be..c25314b42 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -46,13 +46,12 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { lrc->key_frame_rate_correction_factor = 1.0; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; - lrc->last_q[0] = oxcf->best_allowed_q; - lrc->last_q[1] = oxcf->best_allowed_q; - lrc->last_q[2] = oxcf->best_allowed_q; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; + lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; } lrc->buffer_level = vp9_rescale((int)(oxcf->starting_buffer_level), @@ -82,9 +81,9 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, RATE_CONTROL *const lrc = &lc->rc; if (svc->number_temporal_layers > 1) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer] * 1000; + lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; } bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. @@ -132,8 +131,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { } else { const double prev_layer_framerate = oxcf->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = - oxcf->ts_target_bitrate[layer - 1] * 1000; + const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); @@ -151,20 +149,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); - lrc->max_gf_interval = 16; - - lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1; - - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (lrc->max_gf_interval > oxcf->lag_in_frames - 1) - lrc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (lrc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - lrc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (lrc->max_gf_interval > lrc->static_scene_max_gf_interval) - lrc->max_gf_interval = lrc->static_scene_max_gf_interval; + vp9_rc_set_gf_max_interval(oxcf, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 91d8ea4dc..eb5ae2e41 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -156,6 +156,18 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } +void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); +} + +void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, + const uint8_t *ref_ptr, int ref_stride, + unsigned int *sse, int *sum) { + variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); +} + unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c47fe1335..4a194b72c 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -25,15 +25,13 @@ void variance(const uint8_t *a, int a_stride, typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, - int ref_stride, - unsigned int max_sad); + int ref_stride); typedef unsigned int(*vp9_sad_avg_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, - const uint8_t *second_pred, - unsigned int max_sad); + const uint8_t *second_pred); typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr, int source_stride, diff --git a/vp9/encoder/x86/vp9_dct_ssse3.asm b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm index 8723a7114..8723a7114 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.asm +++ b/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm index 48ccef8cc..48ccef8cc 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm diff --git a/vp9/encoder/x86/vp9_ssim_opt.asm b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm index 455d10d2c..455d10d2c 100644 --- a/vp9/encoder/x86/vp9_ssim_opt.asm +++ b/vp9/encoder/x86/vp9_ssim_opt_x86_64.asm diff --git a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm index 673e0b3a6..21aaa9383 100644 --- a/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm +++ b/vp9/encoder/x86/vp9_temporal_filter_apply_sse2.asm @@ -43,9 +43,9 @@ sym(vp9_temporal_filter_apply_sse2): mov [rsp + rbp_backup], rbp ; end prolog - mov rdx, arg(3) + mov edx, arg(3) mov [rsp + block_width], rdx - mov rdx, arg(4) + mov edx, arg(4) mov [rsp + block_height], rdx movd xmm6, arg(5) movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read diff --git a/vp9/encoder/x86/vp9_variance_avx2.c b/vp9/encoder/x86/vp9_variance_avx2.c index 835c51957..7f81f46b8 100644 --- a/vp9/encoder/x86/vp9_variance_avx2.c +++ b/vp9/encoder/x86/vp9_variance_avx2.c @@ -10,7 +10,6 @@ #include "./vpx_config.h" #include "vp9/encoder/vp9_variance.h" -#include "vp9/common/vp9_pragmas.h" #include "vpx_ports/mem.h" typedef void (*get_var_avx2) ( diff --git a/vp9/encoder/x86/vp9_variance_mmx.c b/vp9/encoder/x86/vp9_variance_mmx.c index c4d17fc0f..ae2f976af 100644 --- a/vp9/encoder/x86/vp9_variance_mmx.c +++ b/vp9/encoder/x86/vp9_variance_mmx.c @@ -10,7 +10,6 @@ #include "./vpx_config.h" #include "vp9/encoder/vp9_variance.h" -#include "vp9/common/vp9_pragmas.h" #include "vpx_ports/mem.h" extern unsigned int vp9_get8x8var_mmx diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c index 41f225922..e935a233a 100644 --- a/vp9/encoder/x86/vp9_variance_sse2.c +++ b/vp9/encoder/x86/vp9_variance_sse2.c @@ -11,53 +11,29 @@ #include "./vpx_config.h" #include "vp9/encoder/vp9_variance.h" -#include "vp9/common/vp9_pragmas.h" #include "vpx_ports/mem.h" -extern unsigned int vp9_get4x4var_mmx -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -unsigned int vp9_get16x16var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); -unsigned int vp9_get8x8var_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -typedef unsigned int (*get_var_sse2) ( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum -); - -static void variance_sse2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - int w, int h, unsigned int *sse, int *sum, - get_var_sse2 var_fn, int block_size) { - unsigned int sse0; - int sum0; +typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vp9_get4x4var_mmx(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + + +unsigned int vp9_get8x8var_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +unsigned int vp9_get16x16var_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse, int *sum); + +static void variance_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + int w, int h, unsigned int *sse, int *sum, + variance_fn_t var_fn, int block_size) { int i, j; *sse = 0; @@ -65,217 +41,139 @@ static void variance_sse2(const unsigned char *src_ptr, int source_stride, for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { - var_fn(src_ptr + source_stride * i + j, source_stride, - ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); + unsigned int sse0; + int sum0; + var_fn(src + src_stride * i + j, src_stride, + ref + ref_stride * i + j, ref_stride, &sse0, &sum0); *sse += sse0; *sum += sum0; } } } -unsigned int vp9_variance4x4_sse2( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); +unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 4, 4, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 4); } -unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 4, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 5); } -unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, - &var, &avg, vp9_get4x4var_mmx, 4); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 4, 8, + sse, &sum, vp9_get4x4var_mmx, 4); + return *sse - (((unsigned int)sum * sum) >> 5); } -unsigned int vp9_variance8x8_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 6)); +unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 8, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 6); } -unsigned int vp9_variance16x8_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); +unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 8, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); } -unsigned int vp9_variance8x16_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, - &var, &avg, vp9_get8x8var_sse2, 8); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); +unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 8, 16, + sse, &sum, vp9_get8x8var_sse2, 8); + return *sse - (((unsigned int)sum * sum) >> 7); } -unsigned int vp9_variance16x16_sse2 -( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); +unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 16, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((unsigned int)sum * sum) >> 8); } -unsigned int vp9_mse16x16_sse2( - const unsigned char *src_ptr, - int source_stride, - const unsigned char *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int sse0; - int sum0; - vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, - &sum0); - *sse = sse0; - return sse0; +unsigned int vp9_mse16x16_sse2(const unsigned char *src, int src_stride, + const unsigned char *ref, int ref_stride, + unsigned int *sse) { + int sum; + vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + return *sse; } -unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 10)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 10); } -unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 16, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); } -unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 16, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 9); } -unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 12)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 64, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 12); } -unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 64, 32, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); } -unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, - &var, &avg, vp9_get16x16var_sse2, 16); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); + int sum; + variance_sse2(src, src_stride, ref, ref_stride, 32, 64, + sse, &sum, vp9_get16x16var_sse2, 16); + return *sse - (((int64_t)sum * sum) >> 11); } #define DECL(w, opt) \ |