diff options
Diffstat (limited to 'vp9')
30 files changed, 731 insertions, 873 deletions
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 1aab36205..61682c42d 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -195,7 +195,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, int block, int mi_row, int mi_col) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; - const MODE_INFO *prev_mi = cm->prev_mi + const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col] : NULL; const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL; diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index fe9cc9e6a..20de43414 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -120,7 +120,6 @@ typedef struct VP9Common { // frame header, 3 reset all contexts. int reset_frame_context; - int frame_flags; // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in // MODE_INFO (8-pixel) units. int MBs; @@ -284,15 +283,15 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, xd->left_available = (mi_col > tile->mi_col_start); } -static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) { - const int use_prev_mi = cm->coding_use_prev_mi && - cm->width == cm->last_width && - cm->height == cm->last_height && - !cm->intra_only && - cm->last_show_frame; +static INLINE void set_prev_mi(VP9_COMMON *cm) { + const int use_prev_in_find_mv_refs = cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->intra_only && + cm->last_show_frame; // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. - return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL; + cm->prev_mi = use_prev_in_find_mv_refs ? + cm->prev_mip + cm->mi_stride + 1 : NULL; } static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index b45559245..8a8155410 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -13,7 +13,6 @@ struct macroblockd; struct macroblock; struct vp9_variance_vtable; -#define DEC_MVCOSTS int *mvjcost, int *mvcost[2] struct mv; union int_mv; struct yv12_buffer_config; @@ -758,20 +757,20 @@ specialize qw/vp9_fdct32x32_rd sse2 avx2/; # # Motion search # -add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv"; +add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv"; specialize qw/vp9_full_search_sad sse3 sse4_1/; $vp9_full_search_sad_sse3=vp9_full_search_sadx3; $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; -add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_refining_search_sad sse3/; $vp9_refining_search_sad_sse3=vp9_refining_search_sadx4; -add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_diamond_search_sad sse3/; $vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4; -add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv"; +add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_full_range_search/; add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; diff --git a/vp9/common/x86/vp9_copy_sse2.asm b/vp9/common/x86/vp9_copy_sse2.asm index dd522c698..b26383708 100644 --- a/vp9/common/x86/vp9_copy_sse2.asm +++ b/vp9/common/x86/vp9_copy_sse2.asm @@ -133,10 +133,14 @@ INIT_MMX sse movh m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg - pavgb m0, [dstq] - pavgb m1, [dstq+dst_strideq] - pavgb m2, [dstq+dst_strideq*2] - pavgb m3, [dstq+r6q] + movh m4, [dstq] + movh m5, [dstq+dst_strideq] + movh m6, [dstq+dst_strideq*2] + movh m7, [dstq+r6q] + pavgb m0, m4 + pavgb m1, m5 + pavgb m2, m6 + pavgb m3, m7 %endif movh [dstq ], m0 movh [dstq+dst_strideq ], m1 diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 9b63961f0..022a4296f 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1296,7 +1296,11 @@ int vp9_decode_frame(VP9Decoder *pbi, } init_macroblockd(cm, &pbi->mb); - cm->prev_mi = get_prev_mi(cm); + + if (cm->coding_use_prev_mi) + set_prev_mi(cm); + else + cm->prev_mi = NULL; setup_plane_dequants(cm, xd, cm->base_qindex); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index fd74478e9..56dbc99ed 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -361,7 +361,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, // If multiple threads are used to decode tiles, then we use those threads // to do parallel loopfiltering. if (pbi->num_tile_workers) { - vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0); + vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0); } else { vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0); } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index c9dc25191..b8250c2bb 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -49,9 +49,6 @@ typedef struct VP9Decoder { int decoded_key_frame; - int initial_width; - int initial_height; - int do_loopfilter_inline; // apply loopfilter to available rows immediately VP9Worker lf_worker; diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index 9b124c9d9..9098063ce 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -40,13 +40,13 @@ static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { const int nsync = lf_sync->sync_range; if (r && !(c & (nsync - 1))) { - mutex_lock(&lf_sync->mutex_[r - 1]); + pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; + mutex_lock(mutex); while (c > lf_sync->cur_sb_col[r - 1] - nsync) { - pthread_cond_wait(&lf_sync->cond_[r - 1], - &lf_sync->mutex_[r - 1]); + pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); } - pthread_mutex_unlock(&lf_sync->mutex_[r - 1]); + pthread_mutex_unlock(mutex); } #else (void)lf_sync; @@ -94,21 +94,21 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer, VP9LfSync *const lf_sync, int num_lf_workers) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; int r, c; // SB row and col - LOOP_FILTER_MASK lfm; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; for (r = start; r < stop; r += num_lf_workers) { const int mi_row = r << MI_BLOCK_SIZE_LOG2; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride; + MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (c = 0; c < sb_cols; ++c) { const int mi_col = c << MI_BLOCK_SIZE_LOG2; + LOOP_FILTER_MASK lfm; int plane; sync_read(lf_sync, r, c); vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col); - vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm); + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); for (plane = 0; plane < num_planes; ++plane) { vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm); @@ -134,9 +134,9 @@ static int loop_filter_row_worker(void *arg1, void *arg2) { // threads. void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, - MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { + VP9LfSync *const lf_sync = &pbi->lf_row_sync; // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int tile_cols = 1 << cm->log2_tile_cols; @@ -146,8 +146,6 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi, // Allocate memory used in thread synchronization. // This always needs to be done even if frame_filter_level is 0. if (!cm->current_video_frame || cm->last_height != cm->height) { - VP9LfSync *const lf_sync = &pbi->lf_row_sync; - if (cm->last_height != cm->height) { const int aligned_last_height = ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2); @@ -166,8 +164,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi, vp9_loop_filter_frame_init(cm, frame_filter_level); // Initialize cur_sb_col to -1 for all SB rows. - vpx_memset(pbi->lf_row_sync.cur_sb_col, -1, - sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows); + vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Set up loopfilter thread data. // The decoder is using num_workers instead of pbi->num_tile_workers @@ -194,7 +191,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi, lf_data->stop = sb_rows; lf_data->y_only = y_only; // always do all planes in decoder - lf_data->lf_sync = &pbi->lf_row_sync; + lf_data->lf_sync = lf_sync; lf_data->num_lf_workers = num_workers; // Start loopfiltering @@ -253,8 +250,12 @@ void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows, // Deallocate lf synchronization related mutex and data void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) { -#if CONFIG_MULTITHREAD +#if !CONFIG_MULTITHREAD + (void)rows; +#endif // !CONFIG_MULTITHREAD + if (lf_sync != NULL) { +#if CONFIG_MULTITHREAD int i; if (lf_sync->mutex_ != NULL) { @@ -269,17 +270,10 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) { } vpx_free(lf_sync->cond_); } - +#endif // CONFIG_MULTITHREAD vpx_free(lf_sync->cur_sb_col); // clear the structure as the source of this call may be a resize in which // case this call will be followed by an _alloc() which may fail. - vpx_memset(lf_sync, 0, sizeof(*lf_sync)); + vp9_zero(*lf_sync); } -#else - (void)rows; - if (lf_sync != NULL) { - vpx_free(lf_sync->cur_sb_col); - vpx_memset(lf_sync, 0, sizeof(*lf_sync)); - } -#endif // CONFIG_MULTITHREAD } diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h index 005bd7bbd..8738ceebd 100644 --- a/vp9/decoder/vp9_dthread.h +++ b/vp9/decoder/vp9_dthread.h @@ -12,11 +12,9 @@ #define VP9_DECODER_VP9_DTHREAD_H_ #include "./vpx_config.h" -#include "vp9/common/vp9_loopfilter.h" #include "vp9/decoder/vp9_reader.h" #include "vp9/decoder/vp9_thread.h" -struct macroblockd; struct VP9Common; struct VP9Decoder; @@ -43,16 +41,15 @@ typedef struct VP9LfSyncData { } VP9LfSync; // Allocate memory for loopfilter row synchronization. -void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync, +void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync, int rows, int width); // Deallocate loopfilter synchronization related mutex and data. -void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows); +void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows); // Multi-threaded loopfilter that uses the tile threads. void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi, struct VP9Common *cm, - struct macroblockd *xd, int frame_filter_level, int y_only, int partial_frame); diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 787909142..e55881ffc 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -200,6 +200,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { // Rate target ratio to set q delta. const float rate_ratio_qdelta = 2.0; + const double q = vp9_convert_qindex_to_q(cm->base_qindex); vp9_clear_system_state(); // Some of these parameters may be set via codec-control function later. cr->max_sbs_perframe = 10; @@ -209,14 +210,12 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { // Set rate threshold to some fraction of target (and scaled by 256). cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. - cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * - vp9_convert_qindex_to_q(cm->base_qindex)); + cr->thresh_dist_sb = 8 * (int)(q * q); if (cpi->sf.use_nonrd_pick_mode) { // May want to be more conservative with thresholds in non-rd mode for now // as rate/distortion are derived from model based on prediction residual. cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3; - cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) * - vp9_convert_qindex_to_q(cm->base_qindex)); + cr->thresh_dist_sb = 4 * (int)(q * q); } cr->num_seg_blocks = 0; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 61a5022ec..70b70fec2 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -103,34 +103,31 @@ static const uint8_t VP9_VAR_OFFS[64] = { }; static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, - MACROBLOCK *x, + const struct buf_2d *ref, BLOCK_SIZE bs) { - unsigned int var, sse; - var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, - VP9_VAR_OFFS, 0, &sse); + unsigned int sse; + const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, + VP9_VAR_OFFS, 0, &sse); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, - MACROBLOCK *x, - int mi_row, - int mi_col, + const struct buf_2d *ref, + int mi_row, int mi_col, BLOCK_SIZE bs) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - int offset = (mi_row * MI_SIZE) * yv12->y_stride + (mi_col * MI_SIZE); - unsigned int var, sse; - var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - yv12->y_buffer + offset, - yv12->y_stride, - &sse); + const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); + const uint8_t* last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride + + mi_col * MI_SIZE]; + unsigned int sse; + const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, + last_y, last->y_stride, &sse); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb, + unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, mi_row, mi_col, BLOCK_64X64); if (var < 8) @@ -146,7 +143,7 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi, int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb, + unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, mi_row, mi_col, BLOCK_64X64); if (var < 4) @@ -246,8 +243,8 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); // R/D setup. - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; + x->rddiv = cpi->rd.RDDIV; + x->rdmult = cpi->rd.RDMULT; // Setup segment ID. if (seg->enabled) { @@ -822,6 +819,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int output_enabled) { int i, x_idx, y; VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; @@ -907,7 +905,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { for (i = 0; i < TX_MODES; i++) - cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i]; + rd_opt->tx_select_diff[i] += ctx->tx_rd_diff[i]; } #if CONFIG_INTERNAL_STATS @@ -940,12 +938,12 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } } - cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; - cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; - cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - cpi->rd_filter_diff[i] += ctx->best_filter_diff[i]; + rd_opt->filter_diff[i] += ctx->best_filter_diff[i]; } } @@ -1013,12 +1011,16 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, // Set to zero to make sure we do not use the previous encoded frame stats mbmi->skip = 0; - x->source_variance = get_sby_perpixel_variance(cpi, x, bsize); + x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + + // Save rdmult before it might be changed, so it can be restored later. + orig_rdmult = x->rdmult; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + activity_masking(cpi, x); if (aq_mode == VARIANCE_AQ) { const int energy = bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); - if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { @@ -1031,14 +1033,6 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, rdmult_ratio = vp9_vaq_rdmult_ratio(energy); vp9_init_plane_quantizers(cpi, x); - } - - // Save rdmult before it might be changed, so it can be restored later. - orig_rdmult = x->rdmult; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - activity_masking(cpi, x); - - if (aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); x->rdmult = (int)round(x->rdmult * rdmult_ratio); } else if (aq_mode == COMPLEXITY_AQ) { @@ -1070,14 +1064,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, totaldist, bsize, ctx, best_rd); } - if (aq_mode == VARIANCE_AQ) { - x->rdmult = orig_rdmult; - if (*totalrate != INT_MAX) { - vp9_clear_system_state(); - *totalrate = (int)round(*totalrate * rdmult_ratio); - } - } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) { - x->rdmult = orig_rdmult; + x->rdmult = orig_rdmult; + + if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) { + vp9_clear_system_state(); + *totalrate = (int)round(*totalrate * rdmult_ratio); } } @@ -1364,6 +1355,25 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } +static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, + MODE_INFO **prev_mi_8x8) { + const int mis = cm->mi_stride; + int block_row, block_col; + + for (block_row = 0; block_row < 8; ++block_row) { + for (block_col = 0; block_col < 8; ++block_col) { + MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; + const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; + + if (prev_mi) { + const ptrdiff_t offset = prev_mi - cm->prev_mi; + mi_8x8[block_row * mis + block_col] = cm->mi + offset; + mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; + } + } + } +} + static void constrain_copy_partitioning(VP9_COMP *const cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, @@ -1413,38 +1423,10 @@ static void constrain_copy_partitioning(VP9_COMP *const cpi, } } else { // Else this is a partial SB64, copy previous partition. - for (block_row = 0; block_row < 8; ++block_row) { - for (block_col = 0; block_col < 8; ++block_col) { - MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; - const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; - if (prev_mi) { - const ptrdiff_t offset = prev_mi - cm->prev_mi; - mi_8x8[block_row * mis + block_col] = cm->mi + offset; - mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; - } - } - } + copy_partitioning(cm, mi_8x8, prev_mi_8x8); } } -static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8, - MODE_INFO **prev_mi_8x8) { - const int mis = cm->mi_stride; - int block_row, block_col; - - for (block_row = 0; block_row < 8; ++block_row) { - for (block_col = 0; block_col < 8; ++block_col) { - MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col]; - const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0; - - if (prev_mi) { - const ptrdiff_t offset = prev_mi - cm->prev_mi; - mi_8x8[block_row * mis + block_col] = cm->mi + offset; - mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type; - } - } - } -} const struct { int row; @@ -1465,13 +1447,14 @@ static void set_source_var_based_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *x = &cpi->mb; + MACROBLOCK *const x = &cpi->mb; const int mis = cm->mi_stride; - int row8x8_remaining = tile->mi_row_end - mi_row; - int col8x8_remaining = tile->mi_col_end - mi_col; - int r, c; + const int row8x8_remaining = tile->mi_row_end - mi_row; + const int col8x8_remaining = tile->mi_col_end - mi_col; MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); // In-image SB64 @@ -1483,8 +1466,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi, const int pre_offset = (mi_row * MI_SIZE) * pre_stride + (mi_col * MI_SIZE); const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset; - const int thr_32x32 = cpi->sf.source_var_thresh; - const int thr_64x64 = thr_32x32 << 1; + const unsigned int thr_32x32 = cpi->sf.source_var_thresh; + const unsigned int thr_64x64 = thr_32x32 << 1; int i, j; int index; diff d32[4]; @@ -1550,16 +1533,13 @@ static void set_source_var_based_partition(VP9_COMP *cpi, BLOCK_SIZE bsize = BLOCK_16X16; int bh = num_8x8_blocks_high_lookup[bsize]; int bw = num_8x8_blocks_wide_lookup[bsize]; - + int r, c; for (r = 0; r < MI_BLOCK_SIZE; r += bh) { for (c = 0; c < MI_BLOCK_SIZE; c += bw) { - int index = r * mis + c; - // Find a partition size that fits - bsize = find_partition_size(bsize, - (row8x8_remaining - r), - (col8x8_remaining - c), &bh, &bw); + const int index = r * mis + c; mi_8x8[index] = mi_upper_left + index; - mi_8x8[index]->mbmi.sb_type = bsize; + mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize, + row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); } } } @@ -2234,7 +2214,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, if (cpi->sf.disable_split_var_thresh && partition_none_allowed) { unsigned int source_variancey; vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); - source_variancey = get_sby_perpixel_variance(cpi, x, bsize); + source_variancey = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); if (source_variancey < cpi->sf.disable_split_var_thresh) { do_split = 0; if (source_variancey < cpi->sf.disable_split_var_thresh / 2) @@ -2668,9 +2648,10 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) { if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { return ALLOW_32X32; } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) { + const RD_OPT *const rd_opt = &cpi->rd; const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); - return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] > - cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ? + return rd_opt->tx_select_threshes[frame_type][ALLOW_32X32] > + rd_opt->tx_select_threshes[frame_type][TX_MODE_SELECT] ? ALLOW_32X32 : TX_MODE_SELECT; } else { unsigned int total = 0; @@ -3201,7 +3182,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, 1, &dummy_rate, &dummy_dist); break; case SOURCE_VAR_BASED_PARTITION: - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col); nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rate, &dummy_dist); @@ -3232,8 +3212,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, } // end RTC play code +static int get_skip_encode_frame(const VP9_COMMON *cm) { + unsigned int intra_count = 0, inter_count = 0; + int j; + + for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { + intra_count += cm->counts.intra_inter[j][0]; + inter_count += cm->counts.intra_inter[j][1]; + } + + return (intra_count << 2) < inter_count && + cm->frame_type != KEY_FRAME && + cm->show_frame; +} + static void encode_frame_internal(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; + RD_OPT *const rd_opt = &cpi->rd; MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; @@ -3244,10 +3239,10 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(cm->counts); vp9_zero(cpi->coef_counts); vp9_zero(cpi->tx_stepdown_count); - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); + vp9_zero(rd_opt->comp_pred_diff); + vp9_zero(rd_opt->filter_diff); + vp9_zero(rd_opt->tx_select_diff); + vp9_zero(rd_opt->tx_select_threshes); cm->tx_mode = select_tx_mode(cpi); @@ -3266,7 +3261,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) build_activity_map(cpi); - cm->prev_mi = get_prev_mi(cm); + set_prev_mi(cm); if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD @@ -3284,9 +3279,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { } vp9_zero(x->zcoeff_blk); - if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION && + if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION && cm->current_video_frame > 0) { - int check_freq = cpi->sf.search_type_check_frequency; + int check_freq = sf->search_type_check_frequency; if ((cm->current_video_frame - 1) % check_freq == 0) { cpi->use_large_partition_rate = 0; @@ -3303,7 +3298,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { if ((cm->current_video_frame - 1) % check_freq >= 1) { if (cpi->use_large_partition_rate < 15) - cpi->sf.partition_search_type = FIXED_PARTITION; + sf->partition_search_type = FIXED_PARTITION; } } } @@ -3344,19 +3339,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - if (sf->skip_encode_sb) { - int j; - unsigned int intra_count = 0, inter_count = 0; - for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { - intra_count += cm->counts.intra_inter[j][0]; - inter_count += cm->counts.intra_inter[j][1]; - } - sf->skip_encode_frame = (intra_count << 2) < inter_count && - cm->frame_type != KEY_FRAME && - cm->show_frame; - } else { - sf->skip_encode_frame = 0; - } + sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm) : 0; #if 0 // Keep record of the total distortion this time around for future use @@ -3366,6 +3349,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a @@ -3398,8 +3382,8 @@ void vp9_encode_frame(VP9_COMP *cpi) { // that for subsequent frames. // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); - const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; - const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; + const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type]; + const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type]; /* prediction (compound, single or hybrid) mode selection */ if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) @@ -3432,25 +3416,25 @@ void vp9_encode_frame(VP9_COMP *cpi) { encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { - const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs); - cpi->rd_prediction_type_threshes[frame_type][i] += diff; - cpi->rd_prediction_type_threshes[frame_type][i] >>= 1; + const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs); + rd_opt->prediction_type_threshes[frame_type][i] += diff; + rd_opt->prediction_type_threshes[frame_type][i] >>= 1; } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { - const int64_t diff = cpi->rd_filter_diff[i] / cm->MBs; - cpi->rd_filter_threshes[frame_type][i] = - (cpi->rd_filter_threshes[frame_type][i] + diff) / 2; + const int64_t diff = rd_opt->filter_diff[i] / cm->MBs; + rd_opt->filter_threshes[frame_type][i] = + (rd_opt->filter_threshes[frame_type][i] + diff) / 2; } for (i = 0; i < TX_MODES; ++i) { - int64_t pd = cpi->rd_tx_select_diff[i]; + int64_t pd = rd_opt->tx_select_diff[i]; int diff; if (i == TX_MODE_SELECT) pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0); diff = (int) (pd / cm->MBs); - cpi->rd_tx_select_threshes[frame_type][i] += diff; - cpi->rd_tx_select_threshes[frame_type][i] /= 2; + rd_opt->tx_select_threshes[frame_type][i] += diff; + rd_opt->tx_select_threshes[frame_type][i] /= 2; } if (cm->reference_mode == REFERENCE_MODE_SELECT) { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 5e98e4e3f..b0c014eef 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -63,24 +63,17 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { } #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) -typedef struct vp9_token_state vp9_token_state; -struct vp9_token_state { +typedef struct vp9_token_state { int rate; int error; int next; signed char token; short qc; -}; +} vp9_token_state; // TODO(jimbankoski): experiment to find optimal RD numbers. -#define Y1_RD_MULT 4 -#define UV_RD_MULT 2 - -static const int plane_rd_mult[4] = { - Y1_RD_MULT, - UV_RD_MULT, -}; +static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 }; #define UPDATE_RD_COST()\ {\ @@ -105,60 +98,56 @@ static int trellis_get_coeff_context(const int16_t *scan, return pt; } -static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, MACROBLOCK *mb, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { +static int optimize_b(MACROBLOCK *mb, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; - struct macroblock_plane *p = &mb->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; + struct macroblock_plane *const p = &mb->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); - int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int eob = p->eobs[block], final_eob, sz = 0; - const int i0 = 0; - int rc, x, next, i; - int64_t rdmult, rddiv, rd_cost0, rd_cost1; - int rate0, rate1, error0, error1, t0, t1; - int best, band, pt; - PLANE_TYPE type = pd->plane_type; - int err_mult = plane_rd_mult[type]; + uint8_t token_cache[1024]; + const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block); + int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); + int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + const int eob = p->eobs[block]; + const PLANE_TYPE type = pd->plane_type; const int default_eob = 16 << (tx_size << 1); const int mul = 1 + (tx_size == TX_32X32); - uint8_t token_cache[1024]; const int16_t *dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); - const scan_order *so = get_scan(xd, tx_size, type, block); - const int16_t *scan = so->scan; - const int16_t *nb = so->neighbors; + const scan_order *const so = get_scan(xd, tx_size, type, block); + const int16_t *const scan = so->scan; + const int16_t *const nb = so->neighbors; + int next = eob, sz = 0; + int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv; + int64_t rd_cost0, rd_cost1; + int rate0, rate1, error0, error1, t0, t1; + int best, band, pt, i, final_eob; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ - rdmult = mb->rdmult * err_mult; - if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi)) + if (!ref) rdmult = (rdmult * 9) >> 4; - rddiv = mb->rddiv; + /* Initialize the sentinel node of the trellis. */ tokens[eob][0].rate = 0; tokens[eob][0].error = 0; tokens[eob][0].next = default_eob; tokens[eob][0].token = EOB_TOKEN; tokens[eob][0].qc = 0; - *(tokens[eob] + 1) = *(tokens[eob] + 0); - next = eob; + tokens[eob][1] = tokens[eob][0]; + for (i = 0; i < eob; i++) - token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ - qcoeff[scan[i]]].token]; + token_cache[scan[i]] = + vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token]; - for (i = eob; i-- > i0;) { + for (i = eob; i-- > 0;) { int base_bits, d2, dx; - - rc = scan[i]; - x = qcoeff[rc]; + const int rc = scan[i]; + int x = qcoeff[rc]; /* Only add a trellis state for non-zero coefficients. */ if (x) { int shortcut = 0; @@ -172,17 +161,15 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, if (next < default_eob) { band = band_translate[i + 1]; pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += - mb->token_costs[tx_size][type][ref][band][0][pt] - [tokens[next][0].token]; - rate1 += - mb->token_costs[tx_size][type][ref][band][0][pt] - [tokens[next][1].token]; + rate0 += mb->token_costs[tx_size][type][ref][band][0][pt] + [tokens[next][0].token]; + rate1 += mb->token_costs[tx_size][type][ref][band][0][pt] + [tokens[next][1].token]; } UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); + base_bits = vp9_dct_value_cost_ptr[x]; dx = mul * (dqcoeff[rc] - coeff[rc]); d2 = dx * dx; tokens[i][0].rate = base_bits + (best ? rate1 : rate0); @@ -196,9 +183,9 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && - (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + - dequant_ptr[rc != 0])) + if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) && + (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul + + dequant_ptr[rc != 0])) shortcut = 1; else shortcut = 0; @@ -235,7 +222,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = *(vp9_dct_value_cost_ptr + x); + base_bits = vp9_dct_value_cost_ptr[x]; if (shortcut) { dx -= (dequant_ptr[rc != 0] + sz) ^ sz; @@ -274,26 +261,26 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, /* Now pick the best path through the whole trellis. */ band = band_translate[i + 1]; - pt = combine_entropy_contexts(*a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; error0 = tokens[next][0].error; error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0]; - rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1]; + rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0]; + rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; - final_eob = i0 - 1; + final_eob = -1; vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); for (i = next; i < eob; i = next) { - x = tokens[i][best].qc; + const int x = tokens[i][best].qc; + const int rc = scan[i]; if (x) { final_eob = i; } - rc = scan[i]; + qcoeff[rc] = x; dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul; @@ -303,7 +290,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize, final_eob++; mb->plane[plane].eobs[block] = final_eob; - *a = *l = (final_eob > 0); + return final_eob; } static INLINE void fdct32x32(int rd_transform, @@ -393,7 +380,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, vp9_xform_quant(x, plane, block, plane_bsize, tx_size); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { - optimize_b(plane, block, plane_bsize, tx_size, x, a, l); + const int ctx = combine_entropy_contexts(*a, *l); + *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0; } else { *a = *l = p->eobs[block] > 0; } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index c51f43fa8..874767de7 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -376,15 +376,12 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { } } -static unsigned int zz_motion_search(const MACROBLOCK *x) { - const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *const src = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const uint8_t *const ref = xd->plane[0].pre[0].buf; - const int ref_stride = xd->plane[0].pre[0].stride; +static unsigned int get_prediction_error(BLOCK_SIZE bsize, + const struct buf_2d *src, + const struct buf_2d *ref) { unsigned int sse; - vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type); - fn(src, src_stride, ref, ref_stride, &sse); + const vp9_variance_fn_t fn = get_block_variance_fn(bsize); + fn(src->buf, src->stride, ref->buf, ref->stride, &sse); return sse; } @@ -416,9 +413,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Center the initial step/diamond search on best mv. tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, step_param, - x->sadperbit16, &num00, &v_fn_ptr, - x->nmvjointcost, - x->mvcost, ref_mv); + x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) @@ -442,9 +437,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } else { tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, - &num00, &v_fn_ptr, - x->nmvjointcost, - x->mvcost, ref_mv); + &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) @@ -632,7 +625,8 @@ void vp9_first_pass(VP9_COMP *cpi) { int_mv mv, tmp_mv; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; - motion_error = zz_motion_search(x); + motion_error = get_prediction_error(bsize, &x->plane[0].src, + &xd->plane[0].pre[0]); // Assume 0,0 motion with no mv overhead. mv.as_int = tmp_mv.as_int = 0; @@ -668,7 +662,8 @@ void vp9_first_pass(VP9_COMP *cpi) { int gf_motion_error; xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; - gf_motion_error = zz_motion_search(x); + gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, + &xd->plane[0].pre[0]); first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &gf_motion_error); @@ -980,8 +975,6 @@ void vp9_init_second_pass(VP9_COMP *cpi) { oxcf->target_bandwidth / 10000000.0); } - cpi->output_framerate = oxcf->framerate; - // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure // that clips that are static but "low complexity" in the intra domain @@ -2186,7 +2179,7 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || - (cm->frame_flags & FRAMEFLAGS_KEY))) { + (cpi->frame_flags & FRAMEFLAGS_KEY))) { cm->frame_type = KEY_FRAME; } else { cm->frame_type = INTER_FRAME; @@ -2256,7 +2249,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Keyframe and section processing. if (rc->frames_to_key == 0 || - (cm->frame_flags & FRAMEFLAGS_KEY)) { + (cpi->frame_flags & FRAMEFLAGS_KEY)) { // Define next KF group and assign bits to it. this_frame_copy = this_frame; find_next_key_frame(cpi, &this_frame_copy); diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 44b171fd1..a9da7283a 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -77,12 +77,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, INT_MAX); } -static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv, +static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, int_mv *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; - int_mv tmp_mv; + MV tmp_mv; // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction @@ -93,24 +93,22 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv, // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv.as_mv, - mb_row, mb_col); + tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { err = tmp_err; - dst_mv->as_int = tmp_mv.as_int; + dst_mv->as_mv = tmp_mv; } // If the current best reference mv is not centered on 0,0 then do a 0,0 // based search as well. - if (ref_mv->as_int) { + if (ref_mv->row != 0 || ref_mv->col != 0) { unsigned int tmp_err; - int_mv zero_ref_mv, tmp_mv; + MV zero_ref_mv = {0, 0}, tmp_mv; - zero_ref_mv.as_int = 0; - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv.as_mv, + tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { - dst_mv->as_int = tmp_mv.as_int; + dst_mv->as_mv = tmp_mv; err = tmp_err; } } @@ -173,7 +171,7 @@ static void update_mbgraph_mb_stats YV12_BUFFER_CONFIG *buf, int mb_y_offset, YV12_BUFFER_CONFIG *golden_ref, - int_mv *prev_golden_ref_mv, + const MV *prev_golden_ref_mv, YV12_BUFFER_CONFIG *alt_ref, int mb_row, int mb_col @@ -239,13 +237,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, int mb_col, mb_row, offset = 0; int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; - int_mv arf_top_mv, gld_top_mv; + MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0}; MODE_INFO mi_local = { { 0 } }; // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. - arf_top_mv.as_int = 0; - gld_top_mv.as_int = 0; x->mv_row_min = -BORDER_MV_PIXELS_B16; x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; xd->up_available = 0; @@ -258,15 +254,13 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, mi_local.mbmi.ref_frame[1] = NONE; for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - int_mv arf_left_mv, gld_left_mv; + MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv; int mb_y_in_offset = mb_y_offset; int arf_y_in_offset = arf_y_offset; int gld_y_in_offset = gld_y_offset; // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. - arf_left_mv.as_int = arf_top_mv.as_int; - gld_left_mv.as_int = gld_top_mv.as_int; x->mv_col_min = -BORDER_MV_PIXELS_B16; x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; xd->left_available = 0; @@ -277,11 +271,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref, &gld_left_mv, alt_ref, mb_row, mb_col); - arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int; - gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int; + arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv; + gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv; if (mb_col == 0) { - arf_top_mv.as_int = arf_left_mv.as_int; - gld_top_mv.as_int = gld_left_mv.as_int; + arf_top_mv = arf_left_mv; + gld_top_mv = gld_left_mv; } xd->left_available = 1; mb_y_in_offset += 16; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index f7a02a4a7..8a7901172 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -66,7 +66,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) { } static INLINE int mv_cost(const MV *mv, - const int *joint_cost, int *comp_cost[2]) { + const int *joint_cost, int *const comp_cost[2]) { return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + comp_cost[1][mv->col]; } @@ -90,14 +90,13 @@ static int mv_err_cost(const MV *mv, const MV *ref, return 0; } -static int mvsad_err_cost(const MV *mv, const MV *ref, - const int *mvjsadcost, int *mvsadcost[2], +static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, int error_per_bit) { - if (mvsadcost) { + if (x->nmvsadcost) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * - error_per_bit, 8); + return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost, + x->nmvsadcost) * error_per_bit, 8); } return 0; } @@ -170,14 +169,13 @@ static INLINE int sp(int x) { return (x & 7) << 1; } -static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c, - int offset) { - return &buf[(r >> 3) * stride + (c >> 3) - offset]; +static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { + return &buf[(r >> 3) * stride + (c >> 3)]; } /* returns subpixel variance error function */ #define DIST(r, c) \ - vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \ + vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ src_stride, &sse) /* checks if (r, c) has better score than previous best */ @@ -270,7 +268,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1) { - const uint8_t *z = x->plane[0].src.buf; + const uint8_t *const z = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; const MACROBLOCKD *xd = &x->e_mbd; unsigned int besterr = INT_MAX; @@ -283,7 +281,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, const int y_stride = xd->plane[0].pre[0].stride; const int offset = bestmv->row * y_stride + bestmv->col; - const uint8_t *y = xd->plane[0].pre[0].buf + offset; + const uint8_t *const y = xd->plane[0].pre[0].buf; int rr = ref_mv->row; int rc = ref_mv->col; @@ -303,7 +301,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, bestmv->col *= 8; // calculate central point error - besterr = vfp->vf(y, y_stride, z, src_stride, sse1); + besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -353,7 +351,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, #undef DIST /* returns subpixel variance error function */ #define DIST(r, c) \ - vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \ + vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ z, src_stride, &sse, second_pred) int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, @@ -368,7 +366,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, unsigned int *sse1, const uint8_t *second_pred, int w, int h) { - const uint8_t *z = x->plane[0].src.buf; + const uint8_t *const z = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; const MACROBLOCKD *xd = &x->e_mbd; unsigned int besterr = INT_MAX; @@ -382,7 +380,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; const int offset = bestmv->row * y_stride + bestmv->col; - const uint8_t *y = xd->plane[0].pre[0].buf + offset; + const uint8_t *const y = xd->plane[0].pre[0].buf; int rr = ref_mv->row; int rc = ref_mv->col; @@ -404,7 +402,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- // pixel search, and can be passed in this function. - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride); + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -479,8 +477,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { {\ if (thissad < bestsad) {\ if (use_mvcost) \ - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ - mvjsadcost, mvsadcost, sad_per_bit);\ + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\ if (thissad < bestsad) {\ bestsad = thissad;\ best_site = i;\ @@ -520,9 +517,6 @@ static int vp9_pattern_search(const MACROBLOCK *x, int k = -1; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_init_s = search_param_to_steps[search_param]; - const int *const mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - // adjust ref_mv to make sure it is within MV range clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->row; @@ -531,8 +525,8 @@ static int vp9_pattern_search(const MACROBLOCK *x, // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, - 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv, + sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -880,7 +874,6 @@ int vp9_fast_dia_search(const MACROBLOCK *x, int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *what = x->plane[0].src.buf; @@ -893,10 +886,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, unsigned int thissad; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - int tr, tc; int best_tr = 0; int best_tc = 0; @@ -918,8 +907,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); start_row = MAX(-range, x->mv_row_min - ref_row); start_col = MAX(-range, x->mv_col_min - ref_col); @@ -940,8 +928,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, if (sad_array[i] < bestsad) { const MV this_mv = {ref_row + tr, ref_col + tc + i}; thissad = sad_array[i] + - mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_tr = tr; @@ -957,8 +944,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, if (thissad < bestsad) { const MV this_mv = {ref_row + tr, ref_col + tc + i}; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -979,7 +965,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; @@ -991,23 +976,22 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site *const ss = &x->ss[search_param * x->searches_per_step]; const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - const uint8_t *best_address; + const uint8_t *best_address, *in_what_ref; int best_sad = INT_MAX; int best_site = 0; int last_site = 0; int i, j, step; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - best_address = get_buf_from_mv(in_what, ref_mv); + in_what_ref = get_buf_from_mv(in_what, ref_mv); + best_address = in_what_ref; *num00 = 0; *best_mv = *ref_mv; // Check the starting position best_sad = fn_ptr->sdf(what->buf, what->stride, - in_what->buf, in_what->stride, 0x7fffffff) + - mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); + best_address, in_what->stride, 0x7fffffff) + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1020,8 +1004,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_address + ss[i].offset, in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; best_site = i; @@ -1046,8 +1029,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, best_address + ss[best_site].offset, in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; best_mv->row += ss[best_site].mv.row; @@ -1060,7 +1042,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, break; }; #endif - } else if (best_address == in_what->buf) { + } else if (best_address == in_what_ref) { (*num00)++; } } @@ -1071,7 +1053,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv) { int i, j, step; @@ -1098,10 +1079,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, const int tot_steps = (x->ss_count / x->searches_per_step) - search_param; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->row; ref_col = ref_mv->col; @@ -1115,8 +1092,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 1; @@ -1149,9 +1125,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, if (sad_array[t] < bestsad) { const MV this_mv = {best_mv->row + ss[i].mv.row, best_mv->col + ss[i].mv.col}; - sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, + sad_per_bit); if (sad_array[t] < bestsad) { bestsad = sad_array[t]; best_site = i; @@ -1171,9 +1146,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x, in_what_stride, bestsad); if (thissad < bestsad) { - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; @@ -1231,8 +1204,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, int thissme, n, num00 = 0; int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param, sadpb, &n, - fn_ptr, x->nmvjointcost, - x->mvcost, ref_mv); + fn_ptr, ref_mv); if (bestsme < INT_MAX) bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); *dst_mv = temp_mv; @@ -1250,8 +1222,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, } else { thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv, step_param + n, sadpb, &num00, - fn_ptr, x->nmvjointcost, x->mvcost, - ref_mv); + fn_ptr, ref_mv); if (thissme < INT_MAX) thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); @@ -1271,8 +1242,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, const int search_range = 8; MV best_mv = *dst_mv; thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range, - fn_ptr, x->nmvjointcost, x->mvcost, - ref_mv); + fn_ptr, ref_mv); if (thissme < INT_MAX) thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); if (thissme < bestsme) { @@ -1286,7 +1256,6 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, MV *best_mv) { int r, c; const MACROBLOCKD *const xd = &x->e_mbd; @@ -1296,12 +1265,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const int row_max = MIN(ref_mv->row + distance, x->mv_row_max); const int col_min = MAX(ref_mv->col - distance, x->mv_col_min); const int col_max = MIN(ref_mv->col + distance, x->mv_col_max); - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); *best_mv = *ref_mv; for (r = row_min; r < row_max; ++r) { @@ -1309,9 +1276,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, const MV mv = {r, c}; const int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) + - mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); - + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; *best_mv = mv; @@ -1324,7 +1289,6 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, MV *best_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *const what = x->plane[0].src.buf; @@ -1346,8 +1310,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, const int col_max = MIN(ref_col + distance, x->mv_col_max); unsigned int sad_array[3]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; // Work out the mid point for the search const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; @@ -1358,8 +1320,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); for (r = row_min; r < row_max; r++) { const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; @@ -1376,9 +1337,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1396,9 +1355,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1416,7 +1373,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv, int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, MV *best_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *const what = x->plane[0].src.buf; @@ -1439,9 +1395,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, unsigned int sad_array[3]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - // Work out the mid point for the search const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col]; @@ -1451,8 +1404,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, // Baseline value at the center bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); for (r = row_min; r < row_max; r++) { const uint8_t *check_here = &in_what[r * in_what_stride + col_min]; @@ -1469,9 +1421,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1494,9 +1444,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1515,9 +1463,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv, if (thissad < bestsad) { this_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); - + thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row = r; @@ -1536,20 +1482,16 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv) { const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; i++) { @@ -1562,8 +1504,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost, - error_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; @@ -1586,19 +1527,16 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}}; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; i++) { @@ -1623,9 +1561,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, if (sads[j] < best_sad) { const MV mv = {ref_mv->row + neighbors[j].row, ref_mv->col + neighbors[j].col}; - sads[j] += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - + sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sads[j] < best_sad) { best_sad = sads[j]; best_site = j; @@ -1642,9 +1578,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x, get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); - + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; @@ -1672,7 +1606,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h) { const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}, @@ -1681,12 +1614,10 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - const int *mvjsadcost = x->nmvjointsadcost; - int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; ++i) { @@ -1701,8 +1632,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, get_buf_from_mv(in_what, &mv), in_what->stride, second_pred, best_sad); if (sad < best_sad) { - sad += mvsad_err_cost(&mv, &fcenter_mv, - mvjsadcost, mvsadcost, error_per_bit); + sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index f7b7c5e49..70d7985e4 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -110,14 +110,12 @@ typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, MV *best_mv); typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x, MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv); typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x, @@ -125,14 +123,12 @@ typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv); int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h); #ifdef __cplusplus diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0ac9d5f05..03f3c87a2 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -61,11 +61,6 @@ void vp9_coef_tree_initialize(); // now so that HIGH_PRECISION is always // chosen. -// Max rate target for 1080P and below encodes under normal circumstances -// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB -#define MAX_MB_RATE 250 -#define MAXRATE_1080P 2025000 - // #define OUTPUT_YUV_REC #ifdef OUTPUT_YUV_SRC @@ -386,117 +381,119 @@ static int is_slowest_mode(int mode) { static void set_rd_speed_thresholds(VP9_COMP *cpi) { int i; + RD_OPT *const rd = &cpi->rd; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - - cpi->rd_thresh_mult[THR_NEARESTMV] = 0; - cpi->rd_thresh_mult[THR_NEARESTG] = 0; - cpi->rd_thresh_mult[THR_NEARESTA] = 0; - - cpi->rd_thresh_mult[THR_DC] += 1000; - - cpi->rd_thresh_mult[THR_NEWMV] += 1000; - cpi->rd_thresh_mult[THR_NEWA] += 1000; - cpi->rd_thresh_mult[THR_NEWG] += 1000; - - cpi->rd_thresh_mult[THR_NEARMV] += 1000; - cpi->rd_thresh_mult[THR_NEARA] += 1000; - cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000; - cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000; - - cpi->rd_thresh_mult[THR_TM] += 1000; - - cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500; - cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000; - cpi->rd_thresh_mult[THR_NEARG] += 1000; - cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500; - cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000; - - cpi->rd_thresh_mult[THR_ZEROMV] += 2000; - cpi->rd_thresh_mult[THR_ZEROG] += 2000; - cpi->rd_thresh_mult[THR_ZEROA] += 2000; - cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500; - cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500; - - cpi->rd_thresh_mult[THR_H_PRED] += 2000; - cpi->rd_thresh_mult[THR_V_PRED] += 2000; - cpi->rd_thresh_mult[THR_D45_PRED ] += 2500; - cpi->rd_thresh_mult[THR_D135_PRED] += 2500; - cpi->rd_thresh_mult[THR_D117_PRED] += 2500; - cpi->rd_thresh_mult[THR_D153_PRED] += 2500; - cpi->rd_thresh_mult[THR_D207_PRED] += 2500; - cpi->rd_thresh_mult[THR_D63_PRED] += 2500; + rd->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + + rd->thresh_mult[THR_NEARESTMV] = 0; + rd->thresh_mult[THR_NEARESTG] = 0; + rd->thresh_mult[THR_NEARESTA] = 0; + + rd->thresh_mult[THR_DC] += 1000; + + rd->thresh_mult[THR_NEWMV] += 1000; + rd->thresh_mult[THR_NEWA] += 1000; + rd->thresh_mult[THR_NEWG] += 1000; + + rd->thresh_mult[THR_NEARMV] += 1000; + rd->thresh_mult[THR_NEARA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; + + rd->thresh_mult[THR_TM] += 1000; + + rd->thresh_mult[THR_COMP_NEARLA] += 1500; + rd->thresh_mult[THR_COMP_NEWLA] += 2000; + rd->thresh_mult[THR_NEARG] += 1000; + rd->thresh_mult[THR_COMP_NEARGA] += 1500; + rd->thresh_mult[THR_COMP_NEWGA] += 2000; + + rd->thresh_mult[THR_ZEROMV] += 2000; + rd->thresh_mult[THR_ZEROG] += 2000; + rd->thresh_mult[THR_ZEROA] += 2000; + rd->thresh_mult[THR_COMP_ZEROLA] += 2500; + rd->thresh_mult[THR_COMP_ZEROGA] += 2500; + + rd->thresh_mult[THR_H_PRED] += 2000; + rd->thresh_mult[THR_V_PRED] += 2000; + rd->thresh_mult[THR_D45_PRED ] += 2500; + rd->thresh_mult[THR_D135_PRED] += 2500; + rd->thresh_mult[THR_D117_PRED] += 2500; + rd->thresh_mult[THR_D153_PRED] += 2500; + rd->thresh_mult[THR_D207_PRED] += 2500; + rd->thresh_mult[THR_D63_PRED] += 2500; /* disable frame modes if flags not set */ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX; - cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX; + rd->thresh_mult[THR_NEWMV ] = INT_MAX; + rd->thresh_mult[THR_NEARESTMV] = INT_MAX; + rd->thresh_mult[THR_ZEROMV ] = INT_MAX; + rd->thresh_mult[THR_NEARMV ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX; - cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX; + rd->thresh_mult[THR_NEARESTG ] = INT_MAX; + rd->thresh_mult[THR_ZEROG ] = INT_MAX; + rd->thresh_mult[THR_NEARG ] = INT_MAX; + rd->thresh_mult[THR_NEWG ] = INT_MAX; } if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX; - cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX; - cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX; + rd->thresh_mult[THR_NEARESTA ] = INT_MAX; + rd->thresh_mult[THR_ZEROA ] = INT_MAX; + rd->thresh_mult[THR_NEARA ] = INT_MAX; + rd->thresh_mult[THR_NEWA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; } if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; } } static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { const SPEED_FEATURES *const sf = &cpi->sf; + RD_OPT *const rd = &cpi->rd; int i; for (i = 0; i < MAX_REFS; ++i) - cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; + rd->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; - cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500; - cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500; - cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500; - cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500; - cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500; - cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500; + rd->thresh_mult_sub8x8[THR_LAST] += 2500; + rd->thresh_mult_sub8x8[THR_GOLD] += 2500; + rd->thresh_mult_sub8x8[THR_ALTR] += 2500; + rd->thresh_mult_sub8x8[THR_INTRA] += 2500; + rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; + rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; // Check for masked out split cases. for (i = 0; i < MAX_REFS; i++) if (sf->disable_split_mask & (1 << i)) - cpi->rd_thresh_mult_sub8x8[i] = INT_MAX; + rd->thresh_mult_sub8x8[i] = INT_MAX; // disable mode test if frame flag is not set if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX; + rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != (VP9_LAST_FLAG | VP9_ALT_FLAG)) - cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; + rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; } static void set_speed_features(VP9_COMP *cpi) { @@ -627,50 +624,8 @@ static void update_frame_size(VP9_COMP *cpi) { } void vp9_new_framerate(VP9_COMP *cpi, double framerate) { - VP9_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - VP9_CONFIG *const oxcf = &cpi->oxcf; - int vbr_max_bits; - - oxcf->framerate = framerate < 0.1 ? 30 : framerate; - cpi->output_framerate = cpi->oxcf.framerate; - rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / - cpi->output_framerate); - rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmin_section / 100); - - rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); - - // A maximum bitrate for a frame is defined. - // The baseline for this aligns with HW implementations that - // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits - // per 16x16 MB (averaged over a frame). However this limit is extended if - // a very high rate is given on the command line or the the rate cannnot - // be acheived because of a user specificed max q (e.g. when the user - // specifies lossless encode. - // - vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth * - oxcf->two_pass_vbrmax_section) / 100); - rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), - vbr_max_bits); - - // Set Maximum gf/arf interval - rc->max_gf_interval = 16; - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - - // Special conditions when alt ref frame enabled in lagged compress mode - if (oxcf->play_alternate && oxcf->lag_in_frames) { - if (rc->max_gf_interval > oxcf->lag_in_frames - 1) - rc->max_gf_interval = oxcf->lag_in_frames - 1; - - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; + cpi->oxcf.framerate = framerate < 0.1 ? 30 : framerate; + vp9_rc_update_framerate(cpi); } int64_t vp9_rescale(int64_t val, int64_t num, int denom) { @@ -860,10 +815,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cm->display_width = cpi->oxcf.width; cm->display_height = cpi->oxcf.height; - // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) - cpi->oxcf.sharpness = MIN(7, cpi->oxcf.sharpness); - - cpi->common.lf.sharpness_level = cpi->oxcf.sharpness; + cm->lf.sharpness_level = cpi->oxcf.sharpness; if (cpi->initial_width) { // Increasing the size of the frame beyond the first seen frame, or some @@ -883,10 +835,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cpi->speed = abs(cpi->oxcf.cpu_used); - // Limit on lag buffers as these are not currently dynamically allocated. - if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) - cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - #if CONFIG_MULTIPLE_ARF vp9_zero(cpi->alt_ref_source); #else @@ -906,7 +854,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cpi->ext_refresh_frame_context_pending = 0; } +#ifndef M_LOG2_E #define M_LOG2_E 0.693147180559945309417 +#endif #define log2f(x) (log (x) / (float) M_LOG2_E) static void cal_nmvjointsadcost(int *mvjointsadcost) { @@ -1275,9 +1225,9 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) - cpi->rd_thresh_freq_fact[i][j] = 32; + cpi->rd.thresh_freq_fact[i][j] = 32; for (j = 0; j < MAX_REFS; ++j) - cpi->rd_thresh_freq_sub8x8[i][j] = 32; + cpi->rd.thresh_freq_sub8x8[i][j] = 32; } #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \ @@ -1386,8 +1336,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { cm->error.setjmp = 0; - vp9_zero(cpi->common.counts.uv_mode); - #ifdef MODE_TEST_HIT_STATS vp9_zero(cpi->mode_test_hits); #endif @@ -1787,7 +1735,9 @@ static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb, dsts[i], out_h_uv, out_w_uv, dst_strides[i]); } } - vp8_yv12_extend_frame_borders(dst_fb); + // TODO(hkuang): Call C version explicitly + // as neon version only expand border size 32. + vp8_yv12_extend_frame_borders_c(dst_fb); } static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, @@ -1828,7 +1778,9 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb, } } - vp8_yv12_extend_frame_borders(dst_fb); + // TODO(hkuang): Call C version explicitly + // as neon version only expand border size 32. + vp8_yv12_extend_frame_borders_c(dst_fb); } static int find_fp_qindex() { @@ -2538,7 +2490,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_clear_system_state(); - vp9_zero(cpi->rd_tx_select_threshes); + vp9_zero(cpi->rd.tx_select_threshes); #if CONFIG_VP9_POSTPROC if (cpi->oxcf.noise_sensitivity > 0) { @@ -2647,22 +2599,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, output_frame_level_debug_stats(cpi); #endif if (cpi->refresh_golden_frame == 1) - cm->frame_flags |= FRAMEFLAGS_GOLDEN; + cpi->frame_flags |= FRAMEFLAGS_GOLDEN; else - cm->frame_flags &= ~FRAMEFLAGS_GOLDEN; + cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN; if (cpi->refresh_alt_ref_frame == 1) - cm->frame_flags |= FRAMEFLAGS_ALTREF; + cpi->frame_flags |= FRAMEFLAGS_ALTREF; else - cm->frame_flags &= ~FRAMEFLAGS_ALTREF; + cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; get_ref_frame_flags(cpi); + cm->last_frame_type = cm->frame_type; vp9_rc_postencode_update(cpi, *size); if (cm->frame_type == KEY_FRAME) { // Tell the caller that the frame was coded as a key frame - *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; + *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; #if CONFIG_MULTIPLE_ARF // Reset the sequence number. @@ -2673,7 +2626,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } #endif } else { - *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY; + *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; #if CONFIG_MULTIPLE_ARF /* Increment position in the coded frame sequence. */ @@ -3042,7 +2995,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } #endif - cm->frame_flags = *frame_flags; + cpi->frame_flags = *frame_flags; + + if (cpi->pass == 2 && + cm->current_video_frame == 0 && + cpi->oxcf.allow_spatial_resampling && + cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { + // Internal scaling is triggered on the first frame. + vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width, + cpi->oxcf.scaled_frame_height); + } // Reset the frame pointers to the current frame size vp9_realloc_frame_buffer(get_frame_new_buffer(cm), diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 18203f96a..7a110ac4c 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -185,6 +185,7 @@ typedef enum { AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; + typedef struct VP9_CONFIG { BITSTREAM_PROFILE profile; BIT_DEPTH bit_depth; @@ -231,6 +232,11 @@ typedef struct VP9_CONFIG { int lossless; AQ_MODE aq_mode; // Adaptive Quantization mode + // Internal frame size scaling. + int allow_spatial_resampling; + int scaled_frame_width; + int scaled_frame_height; + // Enable feature to reduce the frame quantization every x frames. int frame_periodic_boost; @@ -281,6 +287,35 @@ typedef struct VP9_CONFIG { vp8e_tuning tuning; } VP9_CONFIG; + +typedef struct RD_OPT { + // Thresh_mult is used to set a threshold for the rd score. A higher value + // means that we will accept the best mode so far more often. This number + // is used in combination with the current block size, and thresh_freq_fact + // to pick a threshold. + int thresh_mult[MAX_MODES]; + int thresh_mult_sub8x8[MAX_REFS]; + + int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; + int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; + int thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; + int thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; + + int64_t comp_pred_diff[REFERENCE_MODES]; + int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; + int64_t tx_select_diff[TX_MODES]; + // FIXME(rbultje) can this overflow? + int tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; + + int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; + int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; + int64_t mask_filter; + + int RDMULT; + int RDDIV; +} RD_OPT; + typedef struct VP9_COMP { QUANTS quants; MACROBLOCK mb; @@ -343,31 +378,7 @@ typedef struct VP9_COMP { // Ambient reconstruction err target for force key frames int ambient_err; - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int rd_thresh_mult[MAX_MODES]; - int rd_thresh_mult_sub8x8[MAX_REFS]; - - int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; - int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; - int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS]; - int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS]; - - int64_t rd_comp_pred_diff[REFERENCE_MODES]; - int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; - int64_t rd_tx_select_diff[TX_MODES]; - // FIXME(rbultje) can this overflow? - int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; - - int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; - int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; - int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS]; - int64_t mask_filter_rd; - - int RDMULT; - int RDDIV; + RD_OPT rd; CODING_CONTEXT coding_context; @@ -376,7 +387,6 @@ typedef struct VP9_COMP { int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength - double output_framerate; int64_t last_time_stamp_seen; int64_t last_end_time_stamp_seen; int64_t first_time_stamp_ever; @@ -498,6 +508,8 @@ typedef struct VP9_COMP { int use_large_partition_rate; + int frame_flags; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -591,7 +603,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality -static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) { +static INLINE int frame_is_boosted(const VP9_COMP *cpi) { return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); } diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 3ac85228b..e003a0f42 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -23,7 +23,7 @@ #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" -static int get_max_filter_level(VP9_COMP *cpi) { +static int get_max_filter_level(const VP9_COMP *cpi) { return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; } @@ -43,15 +43,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, return filt_err; } -static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, - int partial_frame) { - VP9_COMMON *const cm = &cpi->common; - struct loopfilter *const lf = &cm->lf; +static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, + int partial_frame) { + const VP9_COMMON *const cm = &cpi->common; + const struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); - int best_err; - int filt_best; int filt_direction = 0; + int best_err, filt_best; + // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); @@ -128,7 +128,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, } } - lf->filter_level = filt_best; + return filt_best; } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, @@ -150,6 +150,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); + lf->filter_level = search_filter_level(sd, cpi, + method == LPF_PICK_FROM_SUBIMAGE); } } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index f3fe99cdb..3d398edc9 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -233,8 +233,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int64_t intra_mode_cost = 50; unsigned char segment_id = mbmi->segment_id; - const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; - const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize]; // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame. int mode_idx[MB_MODE_COUNT] = {0}; INTERP_FILTER filter_ref = SWITCHABLE; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 342081644..76ec84b5f 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -27,6 +27,11 @@ #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ratectrl.h" +// Max rate target for 1080P and below encodes under normal circumstances +// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB +#define MAX_MB_RATE 250 +#define MAXRATE_1080P 2025000 + #define DEFAULT_KF_BOOST 2000 #define DEFAULT_GF_BOOST 2000 @@ -74,14 +79,13 @@ void vp9_rc_init_minq_luts() { for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); - inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55); } } @@ -367,8 +371,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. - target_bits_per_mb = - ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; + target_bits_per_mb = + ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; @@ -565,11 +569,18 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) { - if (!(cm->current_video_frame == 0)) - *top_index = (active_worst_quality + active_best_quality * 3) / 4; + if (cm->frame_type == KEY_FRAME && + !rc->this_key_frame_forced && + !(cm->current_video_frame == 0)) { + int qdelta = 0; + vp9_clear_system_state(); + qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, 2.0); + *top_index = active_worst_quality + qdelta; + *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; } #endif + // Special case code to try and match quality with forced key frames if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; @@ -725,15 +736,26 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, *bottom_index = active_best_quality; #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY - // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) { - if (!(cm->current_video_frame == 0)) - *top_index = (active_worst_quality + active_best_quality * 3) / 4; - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - *top_index = (active_worst_quality + active_best_quality) / 2; + { + int qdelta = 0; + vp9_clear_system_state(); + + // Limit Q range for the adaptive loop. + if (cm->frame_type == KEY_FRAME && + !rc->this_key_frame_forced && + !(cm->current_video_frame == 0)) { + qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, 2.0); + } else if (!rc->is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, 1.75); + } + *top_index = active_worst_quality + qdelta; + *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; } #endif + if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { q = active_best_quality; // Special case code to try and match quality with forced key frames @@ -907,13 +929,22 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, *bottom_index = active_best_quality; #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY - // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) { - *top_index = (active_worst_quality + active_best_quality * 3) / 4; - } else if (!rc->is_src_frame_alt_ref && - (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - *top_index = (active_worst_quality + active_best_quality) / 2; + { + int qdelta = 0; + vp9_clear_system_state(); + + // Limit Q range for the adaptive loop. + if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) { + qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, 2.0); + } else if (!rc->is_src_frame_alt_ref && + (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, + active_worst_quality, 1.75); + } + *top_index = active_worst_quality + qdelta; + *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; } #endif @@ -1065,11 +1096,11 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { } void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; const VP9_CONFIG *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; + const int qindex = cm->base_qindex; - cm->last_frame_type = cm->frame_type; // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); @@ -1080,25 +1111,24 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { - rc->last_q[KEY_FRAME] = cm->base_qindex; - rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO( - 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); + rc->last_q[KEY_FRAME] = qindex; + rc->avg_frame_qindex[KEY_FRAME] = + ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { - rc->last_q[2] = cm->base_qindex; - rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( - 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && + !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { + rc->last_q[2] = qindex; + rc->avg_frame_qindex[2] = + ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2); } else { - rc->last_q[INTER_FRAME] = cm->base_qindex; - rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO( - 3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2); + rc->last_q[INTER_FRAME] = qindex; + rc->avg_frame_qindex[INTER_FRAME] = + ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2); rc->ni_frames++; - rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex); - rc->avg_q = rc->tot_q / (double)rc->ni_frames; - + rc->tot_q += vp9_convert_qindex_to_q(qindex); + rc->avg_q = rc->tot_q / rc->ni_frames; // Calculate the average Q for normal inter frames (not key or GFU frames). - rc->ni_tot_qi += cm->base_qindex; + rc->ni_tot_qi += qindex; rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; } @@ -1107,11 +1137,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // If all mbs in this group are skipped only update if the Q value is // better than that already stored. // This is used to help set quality in forced key frames to reduce popping - if ((cm->base_qindex < rc->last_boosted_qindex) || + if ((qindex < rc->last_boosted_qindex) || ((cpi->static_mb_pct < 100) && ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { - rc->last_boosted_qindex = cm->base_qindex; + rc->last_boosted_qindex = qindex; } update_buffer_level(cpi, rc->projected_frame_size); @@ -1196,7 +1226,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { int target; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || - (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; @@ -1258,17 +1288,25 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const RATE_CONTROL *rc = &cpi->rc; + const VP9_CONFIG *oxcf = &cpi->oxcf; + const SVC *const svc = &cpi->svc; int target; - if (cpi->common.current_video_frame == 0) { target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX) ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2); } else { - const int initial_boost = 32; - int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); - if (rc->frames_since_key < cpi->output_framerate / 2) { + int kf_boost = 32; + double framerate = oxcf->framerate; + if (svc->number_temporal_layers > 1 && + oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + // Use the layer framerate for temporal layers CBR mode. + const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id]; + framerate = lc->framerate; + } + kf_boost = MAX(kf_boost, (int)(2 * framerate - 16)); + if (rc->frames_since_key < framerate / 2) { kf_boost = (int)(kf_boost * rc->frames_since_key / - (cpi->output_framerate / 2)); + (framerate / 2)); } target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4; } @@ -1280,7 +1318,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target = rc->av_per_frame_bandwidth; if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (rc->frames_since_key % cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; @@ -1304,7 +1342,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0 || - (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; @@ -1366,3 +1404,46 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, return target_index - qindex; } + +void vp9_rc_update_framerate(VP9_COMP *cpi) { + const VP9_COMMON *const cm = &cpi->common; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + RATE_CONTROL *const rc = &cpi->rc; + int vbr_max_bits; + + rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / oxcf->framerate); + rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmin_section / 100); + + rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); + + // A maximum bitrate for a frame is defined. + // The baseline for this aligns with HW implementations that + // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits + // per 16x16 MB (averaged over a frame). However this limit is extended if + // a very high rate is given on the command line or the the rate cannnot + // be acheived because of a user specificed max q (e.g. when the user + // specifies lossless encode. + vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth * + oxcf->two_pass_vbrmax_section) / 100); + rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), + vbr_max_bits); + + // Set Maximum gf/arf interval + rc->max_gf_interval = 16; + + // Extended interval for genuinely static scenes + rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; + + // Special conditions when alt ref frame enabled in lagged compress mode + if (oxcf->play_alternate && oxcf->lag_in_frames) { + if (rc->max_gf_interval > oxcf->lag_in_frames - 1) + rc->max_gf_interval = oxcf->lag_in_frames - 1; + + if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) + rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; + } + + if (rc->max_gf_interval > rc->static_scene_max_gf_interval) + rc->max_gf_interval = rc->static_scene_max_gf_interval; +} diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 7693c2b13..cf6526b8b 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -125,8 +125,7 @@ void vp9_rc_get_svc_params(struct VP9_COMP *cpi); // Post encode update of the rate control parameters based // on bytes used -void vp9_rc_postencode_update(struct VP9_COMP *cpi, - uint64_t bytes_used); +void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // Post encode update of the rate control parameters for dropped frames void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); @@ -175,6 +174,8 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget); int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, int qindex, double rate_target_ratio); +void vp9_rc_update_framerate(struct VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 1d70c1f9e..7ef21fa5d 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -81,7 +81,7 @@ struct rdcost_block_args { const scan_order *so; }; -const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { +static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARESTMV, {LAST_FRAME, NONE}}, {NEARESTMV, {ALTREF_FRAME, NONE}}, {NEARESTMV, {GOLDEN_FRAME, NONE}}, @@ -121,7 +121,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {D45_PRED, {INTRA_FRAME, NONE}}, }; -const REF_DEFINITION vp9_ref_order[MAX_REFS] = { +static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { {{LAST_FRAME, NONE}}, {{GOLDEN_FRAME, NONE}}, {{ALTREF_FRAME, NONE}}, @@ -134,8 +134,9 @@ const REF_DEFINITION vp9_ref_order[MAX_REFS] = { // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for blocks size. // The factors here are << 2 (2 = x0.5, 32 = x8 etc). -static int rd_thresh_block_size_factor[BLOCK_SIZES] = - {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32}; +static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { + 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 +}; static int raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, int stride) { @@ -192,7 +193,7 @@ static void fill_token_costs(vp9_coeff_cost *c, } } -static const int rd_iifactor[32] = { +static const uint8_t rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -244,6 +245,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { static void set_block_thresholds(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd = &cpi->rd; int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { @@ -259,14 +261,14 @@ static void set_block_thresholds(VP9_COMP *cpi) { const int thresh_max = INT_MAX / t; for (i = 0; i < MAX_MODES; ++i) - cpi->rd_threshes[segment_id][bsize][i] = - cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4 + rd->threshes[segment_id][bsize][i] = + rd->thresh_mult[i] < thresh_max ? rd->thresh_mult[i] * t / 4 : INT_MAX; for (i = 0; i < MAX_REFS; ++i) { - cpi->rd_thresh_sub8x8[segment_id][bsize][i] = - cpi->rd_thresh_mult_sub8x8[i] < thresh_max - ? cpi->rd_thresh_mult_sub8x8[i] * t / 4 + rd->thresh_sub8x8[segment_id][bsize][i] = + rd->thresh_mult_sub8x8[i] < thresh_max + ? rd->thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } @@ -280,10 +282,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_clear_system_state(); - cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) - cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); + cpi->rd.RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) + cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO; + x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO; x->errorperbit += (x->errorperbit == 0); x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && @@ -1675,14 +1677,14 @@ static INLINE int mv_has_subpel(const MV *mv) { static int check_best_zero_mv( const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int disable_inter_mode_mask, int this_mode, int ref_frame, - int second_ref_frame) { + int disable_inter_mode_mask, int this_mode, + const MV_REFERENCE_FRAME ref_frames[2]) { if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) && (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && - frame_mv[this_mode][ref_frame].as_int == 0 && - (second_ref_frame == NONE || - frame_mv[this_mode][second_ref_frame].as_int == 0)) { - int rfc = mode_context[ref_frame]; + frame_mv[this_mode][ref_frames[0]].as_int == 0 && + (ref_frames[1] == NONE || + frame_mv[this_mode][ref_frames[1]].as_int == 0)) { + int rfc = mode_context[ref_frames[0]]; int c1 = cost_mv_ref(cpi, NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZEROMV, rfc); @@ -1693,15 +1695,15 @@ static int check_best_zero_mv( if (c2 > c3) return 0; } else { assert(this_mode == ZEROMV); - if (second_ref_frame == NONE) { - if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) || - (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0)) + if (ref_frames[1] == NONE) { + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) return 0; } else { - if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 && - frame_mv[NEARESTMV][second_ref_frame].as_int == 0) || - (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 && - frame_mv[NEARMV][second_ref_frame].as_int == 0)) + if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && + frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || + (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && + frame_mv[NEARMV][ref_frames[1]].as_int == 0)) return 0; } } @@ -1779,8 +1781,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, disable_inter_mode_mask, - this_mode, mbmi->ref_frame[0], - mbmi->ref_frame[1])) + this_mode, mbmi->ref_frame)) continue; vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre)); @@ -1890,7 +1891,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, sadpb, 16, v_fn_ptr, - x->nmvjointcost, x->mvcost, &bsi->ref_mv[0]->as_mv, &best_mv->as_mv); if (thissme < bestsme) { @@ -2267,7 +2267,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int_mv *ref_mv, int_mv *second_ref_mv, int64_t comp_pred_diff[REFERENCE_MODES], - int64_t tx_size_diff[TX_MODES], + const int64_t tx_size_diff[TX_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) { MACROBLOCKD *const xd = &x->e_mbd; @@ -2579,7 +2579,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d ref_yv12[2]; int bestsme = INT_MAX; int sadpb = x->sadperbit16; - int_mv tmp_mv; + MV tmp_mv; int search_range = 3; int tmp_col_min = x->mv_col_min; @@ -2608,20 +2608,19 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, vp9_set_mv_search_range(x, &ref_mv[id].as_mv); // Use mv result from single mode as mvp. - tmp_mv.as_int = frame_mv[refs[id]].as_int; + tmp_mv = frame_mv[refs[id]].as_mv; - tmp_mv.as_mv.col >>= 3; - tmp_mv.as_mv.row >>= 3; + tmp_mv.col >>= 3; + tmp_mv.row >>= 3; // Small-range full-pixel motion search - bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb, + bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, &cpi->fn_ptr[bsize], - x->nmvjointcost, x->mvcost, &ref_mv[id].as_mv, second_pred, pw, ph); if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv, + bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, second_pred, &cpi->fn_ptr[bsize], 1); x->mv_col_min = tmp_col_min; @@ -2633,7 +2632,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; bestsme = cpi->find_fractional_mv_step_comp( - x, &tmp_mv.as_mv, + x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, @@ -2648,7 +2647,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[0].pre[0] = scaled_first_yv12; if (bestsme < last_besterr[id]) { - frame_mv[refs[id]].as_int = tmp_mv.as_int; + frame_mv[refs[id]].as_mv = tmp_mv; last_besterr[id] = bestsme; } else { break; @@ -2698,6 +2697,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t *psse, const int64_t ref_best_rd) { VP9_COMMON *cm = &cpi->common; + RD_OPT *rd_opt = &cpi->rd; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; const int is_comp_pred = has_second_ref(mbmi); @@ -2795,14 +2795,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - cpi->mask_filter_rd = 0; + rd_opt->mask_filter = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - cpi->rd_filter_cache[i] = INT64_MAX; + rd_opt->filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { *best_filter = EIGHTTAP; - if (x->source_variance < - cpi->sf.disable_filter_search_var_thresh) { + if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { *best_filter = EIGHTTAP; } else { int newbest; @@ -2818,12 +2817,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (i > 0 && intpel_mv) { rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); - cpi->rd_filter_cache[i] = rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + rd_opt->filter_cache[i] = rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; @@ -2843,12 +2842,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); - cpi->rd_filter_cache[i] = rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); + rd_opt->filter_cache[i] = rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd); if (i == 0 && intpel_mv) { tmp_rate_sum = rate_sum; @@ -3125,6 +3124,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; @@ -3145,7 +3145,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode = { 0 }; - int mode_index, best_mode_index = 0; + int mode_index, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; @@ -3164,8 +3164,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int best_skip2 = 0; int mode_skip_mask = 0; int mode_skip_start = cpi->sf.mode_skip_start + 1; - const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize]; - const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize]; + const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; + const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize]; const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; const int intra_y_mode_mask = cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]; @@ -3302,7 +3302,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index == mode_skip_start && best_rd < INT64_MAX) { + if (mode_index == mode_skip_start && best_mode_index >= 0) { switch (vp9_mode_order[best_mode_index].ref_frame[0]) { case INTRA_FRAME: break; @@ -3339,6 +3339,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && + best_mode_index >=0 && vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) continue; if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) && @@ -3366,7 +3367,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // one of the neighboring directional modes if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && (this_mode >= D45_PRED && this_mode <= TM_PRED)) { - if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) + if (best_mode_index >= 0 && + vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME) continue; } if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { @@ -3376,11 +3378,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } else { if (x->in_active_map && - !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) + !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame}; if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv, - disable_inter_mode_mask, this_mode, ref_frame, - second_ref_frame)) + disable_inter_mode_mask, this_mode, ref_frames)) continue; + } } mbmi->mode = this_mode; @@ -3608,21 +3611,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && cm->interp_filter != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { int64_t adj_rd; if (ref == INT64_MAX) adj_rd = 0; - else if (cpi->rd_filter_cache[i] == INT64_MAX) + else if (rd_opt->filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. - adj_rd = cpi->mask_filter_rd - ref + 10; + adj_rd = rd_opt->mask_filter - ref + 10; else - adj_rd = cpi->rd_filter_cache[i] - ref; + adj_rd = rd_opt->filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); @@ -3654,7 +3657,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, break; } - if (best_rd >= best_rd_so_far) + if (best_mode_index < 0 || best_rd >= best_rd_so_far) return INT64_MAX; // If we used an estimate for the uv intra rd in the loop above... @@ -3684,7 +3687,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { - int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index]; + int *const fact = &rd_opt->thresh_freq_fact[bsize][mode_index]; if (mode_index == best_mode_index) { *fact -= (*fact >> 3); @@ -3756,6 +3759,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; + RD_OPT *const rd_opt = &cpi->rd; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct segmentation *const seg = &cm->seg; @@ -3768,8 +3772,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, VP9_ALT_FLAG }; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise - int64_t best_tx_rd[TX_MODES]; - int64_t best_tx_diff[TX_MODES]; + static const int64_t best_tx_diff[TX_MODES] = { 0 }; int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; @@ -3781,10 +3784,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_inter_rd = INT64_MAX; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; INTERP_FILTER tmp_best_filter = SWITCHABLE; - int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; - int64_t dist_uv[TX_SIZES]; - int skip_uv[TX_SIZES]; - MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 }; + int rate_uv_intra, rate_uv_tokenonly; + int64_t dist_uv; + int skip_uv; + MB_PREDICTION_MODE mode_uv = DC_PRED; int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q); int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; @@ -3806,12 +3809,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; - for (i = 0; i < TX_MODES; i++) - best_tx_rd[i] = INT64_MAX; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = INT64_MAX; - for (i = 0; i < TX_SIZES; i++) - rate_uv_intra[i] = INT_MAX; + rate_uv_intra = INT_MAX; *returnrate = INT_MAX; @@ -3845,7 +3845,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable = 0; - int64_t tx_cache[TX_MODES]; int i; int this_skip2 = 0; int64_t total_sse = INT_MAX; @@ -3882,9 +3881,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // Test best rd so far against threshold for trying this mode. if ((best_rd < - ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] * - cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) || - cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) + ((int64_t)rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] * + rd_opt->thresh_freq_sub8x8[bsize][mode_index] >> 5)) || + rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX) continue; if (ref_frame > INTRA_FRAME && @@ -3911,10 +3910,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jingning, jkoleszar): scaling reference frame not supported for // sub8x8 blocks. - if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) + if (ref_frame > INTRA_FRAME && + vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; - if (second_ref_frame > NONE && + if (second_ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; @@ -3968,9 +3968,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = INT64_MAX; - #ifdef MODE_TEST_HIT_STATS // TEST/DEBUG CODE // Keep a rcord of the number of test hits at each size @@ -3986,21 +3983,18 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += intra_cost_penalty; distortion2 += distortion_y; - if (rate_uv_intra[TX_4X4] == INT_MAX) { + if (rate_uv_intra == INT_MAX) { choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4, - &rate_uv_intra[TX_4X4], - &rate_uv_tokenonly[TX_4X4], - &dist_uv[TX_4X4], &skip_uv[TX_4X4], - &mode_uv[TX_4X4]); + &rate_uv_intra, + &rate_uv_tokenonly, + &dist_uv, &skip_uv, + &mode_uv); } - rate2 += rate_uv_intra[TX_4X4]; - rate_uv = rate_uv_tokenonly[TX_4X4]; - distortion2 += dist_uv[TX_4X4]; - distortion_uv = dist_uv[TX_4X4]; - mbmi->uv_mode = mode_uv[TX_4X4]; - tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = tx_cache[ONLY_4X4]; + rate2 += rate_uv_intra; + rate_uv = rate_uv_tokenonly; + distortion2 += dist_uv; + distortion_uv = dist_uv; + mbmi->uv_mode = mode_uv; } else { int rate; int64_t distortion; @@ -4019,14 +4013,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int uv_skippable; this_rd_thresh = (ref_frame == LAST_FRAME) ? - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] : - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR]; + rd_opt->thresh_sub8x8[segment_id][bsize][THR_LAST] : + rd_opt->thresh_sub8x8[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? - cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; - - cpi->mask_filter_rd = 0; + rd_opt->thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh; + rd_opt->mask_filter = 0; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - cpi->rd_filter_cache[i] = INT64_MAX; + rd_opt->filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; @@ -4059,14 +4052,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, continue; rs = vp9_get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; - cpi->rd_filter_cache[SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], + rd_opt->filter_cache[switchable_filter_index] = tmp_rd; + rd_opt->filter_cache[SWITCHABLE_FILTERS] = + MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; - cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); + rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd); newbest = (tmp_rd < tmp_best_rd); if (newbest) { @@ -4163,10 +4156,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion_uv; skippable = skippable && uv_skippable; total_sse += uv_sse; - - tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - for (i = 0; i < TX_MODES; ++i) - tx_cache[i] = tx_cache[ONLY_4X4]; } } @@ -4304,43 +4293,26 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && cm->interp_filter != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (ref == INT64_MAX) adj_rd = 0; - else if (cpi->rd_filter_cache[i] == INT64_MAX) + else if (rd_opt->filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. - adj_rd = cpi->mask_filter_rd - ref + 10; + adj_rd = rd_opt->mask_filter - ref + 10; else - adj_rd = cpi->rd_filter_cache[i] - ref; + adj_rd = rd_opt->filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd); } } - /* keep record of best txfm size */ - tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4]; - tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8]; - tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16]; - if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) { - int64_t adj_rd = INT64_MAX; - if (ref_frame > INTRA_FRAME) - adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode]; - else - adj_rd = this_rd; - - if (adj_rd < best_tx_rd[i]) - best_tx_rd[i] = adj_rd; - } - } - if (early_term) break; @@ -4355,14 +4327,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->sf.use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) { - TX_SIZE uv_tx_size; *mbmi = best_mbmode; - uv_tx_size = get_uv_tx_size(mbmi); - rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], - &rate_uv_tokenonly[uv_tx_size], - &dist_uv[uv_tx_size], - &skip_uv[uv_tx_size], - BLOCK_8X8, uv_tx_size); + rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, + &rate_uv_tokenonly, + &dist_uv, + &skip_uv, + BLOCK_8X8, TX_4X4); } } @@ -4383,7 +4353,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, // combination that wins out. if (cpi->sf.adaptive_rd_thresh) { for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) { - int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index]; + int *const fact = &rd_opt->thresh_freq_sub8x8[bsize][mode_index]; if (mode_index == best_mode_index) { *fact -= (*fact >> 3); @@ -4424,15 +4394,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); - for (i = 0; i < TX_MODES; i++) { - if (best_tx_rd[i] == INT64_MAX) - best_tx_diff[i] = 0; - else - best_tx_diff[i] = best_rd - best_tx_rd[i]; - } } else { vp9_zero(best_filter_diff); - vp9_zero(best_tx_diff); } set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index d6b6174fa..f0bd8a147 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -13,32 +13,39 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_speed_features.h" -#define ALL_INTRA_MODES ((1 << DC_PRED) | \ - (1 << V_PRED) | (1 << H_PRED) | \ - (1 << D45_PRED) | (1 << D135_PRED) | \ - (1 << D117_PRED) | (1 << D153_PRED) | \ - (1 << D207_PRED) | (1 << D63_PRED) | \ - (1 << TM_PRED)) -#define INTRA_DC_ONLY (1 << DC_PRED) -#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED)) -#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED)) -#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)) - -// Masks for partially or completely disabling split mode -#define DISABLE_ALL_INTER_SPLIT ((1 << THR_COMP_GA) | \ - (1 << THR_COMP_LA) | \ - (1 << THR_ALTR) | \ - (1 << THR_GOLD) | \ - (1 << THR_LAST)) - -#define DISABLE_ALL_SPLIT ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT) - -#define DISABLE_COMPOUND_SPLIT ((1 << THR_COMP_GA) | (1 << THR_COMP_LA)) - -#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \ - (1 << THR_COMP_LA) | \ - (1 << THR_ALTR) | \ - (1 << THR_GOLD)) +enum { + ALL_INTRA_MODES = (1 << DC_PRED) | + (1 << V_PRED) | (1 << H_PRED) | + (1 << D45_PRED) | (1 << D135_PRED) | + (1 << D117_PRED) | (1 << D153_PRED) | + (1 << D207_PRED) | (1 << D63_PRED) | + (1 << TM_PRED), + + INTRA_DC_ONLY = (1 << DC_PRED), + + INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED), + + INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), + + INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED) +}; + +enum { + DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | + (1 << THR_COMP_LA) | + (1 << THR_ALTR) | + (1 << THR_GOLD) | + (1 << THR_LAST), + + DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT, + + DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA), + + LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | + (1 << THR_COMP_LA) | + (1 << THR_ALTR) | + (1 << THR_GOLD) +}; static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { @@ -49,8 +56,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; - sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD - : USE_LARGESTALL; + sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD + : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT @@ -73,8 +80,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } if (speed >= 2) { - sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD - : USE_LARGESTALL; + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT @@ -269,9 +276,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, sf->partition_search_type = SOURCE_VAR_BASED_PARTITION; sf->search_type_check_frequency = 50; sf->source_var_thresh = 360; - - sf->use_nonrd_pick_mode = 1; - sf->search_method = FAST_DIAMOND; } if (speed >= 7) { diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 72f548a04..55422979a 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -344,7 +344,7 @@ typedef struct { int search_type_check_frequency; // The threshold used in SOURCE_VAR_BASED_PARTITION search type. - int source_var_thresh; + unsigned int source_var_thresh; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index c2b6263f0..9b3fc6eab 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -178,7 +178,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; - cpi->output_framerate = lc->framerate; // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1) { @@ -197,7 +196,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) { lc->starting_buffer_level = oxcf->starting_buffer_level; lc->optimal_buffer_level = oxcf->optimal_buffer_level; lc->maximum_buffer_size = oxcf->maximum_buffer_size; - lc->framerate = cpi->output_framerate; } void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 62e20dc00..c9e39a1a2 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -45,12 +45,6 @@ typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr, int ref_stride, unsigned int *sad_array); -typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int ref_stride, - unsigned int *sad_array); - typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr, int source_stride, const uint8_t* const ref_ptr[], @@ -96,7 +90,7 @@ typedef struct vp9_variance_vtable { vp9_variance_fn_t svf_halfpix_v; vp9_variance_fn_t svf_halfpix_hv; vp9_sad_multi_fn_t sdx3f; - vp9_sad_multi1_fn_t sdx8f; + vp9_sad_multi_fn_t sdx8f; vp9_sad_multi_d_fn_t sdx4df; } vp9_variance_fn_ptr_t; diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h deleted file mode 100644 index c15039ad8..000000000 --- a/vp9/encoder/x86/vp9_mcomp_x86.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_ -#define VP9_ENCODER_X86_VP9_MCOMP_X86_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#if HAVE_SSE3 -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sadx3 - -#undef vp9_search_refining_search -#define vp9_search_refining_search vp9_refining_search_sadx4 - -#undef vp9_search_diamond_search -#define vp9_search_diamond_search vp9_diamond_search_sadx4 - -#endif -#endif - -#if HAVE_SSE4_1 -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_search_full_search -#define vp9_search_full_search vp9_full_search_sadx8 - -#endif -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_X86_VP9_MCOMP_X86_H_ - diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 0623ad132..967431c0f 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -168,6 +168,11 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); + if (cfg->rc_resize_allowed == 1) { + RANGE_CHECK(cfg, rc_scaled_width, 1, cfg->g_w); + RANGE_CHECK(cfg, rc_scaled_height, 1, cfg->g_h); + } + RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); if (cfg->ts_number_layers > 1) { @@ -332,6 +337,10 @@ static vpx_codec_err_t set_encoder_config( oxcf->under_shoot_pct = cfg->rc_undershoot_pct; oxcf->over_shoot_pct = cfg->rc_overshoot_pct; + oxcf->allow_spatial_resampling = cfg->rc_resize_allowed; + oxcf->scaled_frame_width = cfg->rc_scaled_width; + oxcf->scaled_frame_height = cfg->rc_scaled_height; + oxcf->maximum_buffer_size = cfg->rc_buf_sz; oxcf->starting_buffer_level = cfg->rc_buf_initial_sz; oxcf->optimal_buffer_level = cfg->rc_buf_optimal_sz; @@ -410,6 +419,9 @@ static vpx_codec_err_t set_encoder_config( printf("fixed_q: %d\n", oxcf->fixed_q); printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q); printf("best_allowed_q: %d\n", oxcf->best_allowed_q); + printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling); + printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width); + printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height); printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias); printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section); printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section); @@ -1128,6 +1140,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { 0, // rc_dropframe_thresh 0, // rc_resize_allowed + 1, // rc_scaled_width + 1, // rc_scaled_height 60, // rc_resize_down_thresold 30, // rc_resize_up_thresold diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index da6c0f8b6..24b8d9de1 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -87,8 +87,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h - -VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm |