diff options
Diffstat (limited to 'vp9/encoder')
46 files changed, 1804 insertions, 1161 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 2ab4c7907..ede744e7f 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -124,8 +124,8 @@ static int write_skip_coeff(const VP9_COMP *cpi, int segment_id, MODE_INFO *m, void vp9_update_skip_probs(VP9_COMMON *cm, vp9_writer *w) { int k; - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - vp9_cond_prob_diff_update(w, &cm->fc.mbskip_probs[k], cm->counts.mbskip[k]); + for (k = 0; k < SKIP_CONTEXTS; ++k) + vp9_cond_prob_diff_update(w, &cm->fc.skip_probs[k], cm->counts.skip[k]); } static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) { @@ -135,11 +135,6 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) { prob_diff_update(vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[j], cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w); - -#ifdef MODE_STATS - if (!cpi->dummy_packing) - update_switchable_interp_stats(cm); -#endif } static void pack_mb_tokens(vp9_writer* const w, @@ -330,13 +325,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { } } - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); vp9_write_token(bc, vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[ctx], &switchable_interp_encodings[mi->interp_filter]); } else { - assert(mi->interp_filter == cm->mcomp_filter_type); + assert(mi->interp_filter == cm->interp_filter); } if (bsize < BLOCK_8X8) { @@ -912,24 +907,20 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) { vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j], ct_32x32p[j]); } -#ifdef MODE_STATS - if (!cpi->dummy_packing) - update_tx_count_stats(cm); -#endif } } -static void write_interp_filter_type(INTERPOLATION_TYPE type, - struct vp9_write_bit_buffer *wb) { - const int type_to_literal[] = { 1, 0, 2, 3 }; +static void write_interp_filter(INTERP_FILTER filter, + struct vp9_write_bit_buffer *wb) { + const int filter_to_literal[] = { 1, 0, 2, 3 }; - vp9_wb_write_bit(wb, type == SWITCHABLE); - if (type != SWITCHABLE) - vp9_wb_write_literal(wb, type_to_literal[type], 2); + vp9_wb_write_bit(wb, filter == SWITCHABLE); + if (filter != SWITCHABLE) + vp9_wb_write_literal(wb, filter_to_literal[filter], 2); } -static void fix_mcomp_filter_type(VP9_COMMON *cm) { - if (cm->mcomp_filter_type == SWITCHABLE) { +static void fix_interp_filter(VP9_COMMON *cm) { + if (cm->interp_filter == SWITCHABLE) { // Check to see if only one of the filters is actually used int count[SWITCHABLE_FILTERS]; int i, j, c = 0; @@ -943,7 +934,7 @@ static void fix_mcomp_filter_type(VP9_COMMON *cm) { // Only one filter is used. So set the filter at frame level for (i = 0; i < SWITCHABLE_FILTERS; ++i) { if (count[i]) { - cm->mcomp_filter_type = i; + cm->interp_filter = i; break; } } @@ -1171,8 +1162,8 @@ static void write_uncompressed_header(VP9_COMP *cpi, vp9_wb_write_bit(wb, cm->allow_high_precision_mv); - fix_mcomp_filter_type(cm); - write_interp_filter_type(cm->mcomp_filter_type, wb); + fix_interp_filter(cm); + write_interp_filter(cm->interp_filter, wb); } } @@ -1223,7 +1214,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { vp9_zero(cm->counts.inter_mode); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) update_switchable_interp_probs(cpi, &header_bc); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) diff --git a/vp9/encoder/vp9_bitstream.h b/vp9/encoder/vp9_bitstream.h index 52be50272..94bec8a43 100644 --- a/vp9/encoder/vp9_bitstream.h +++ b/vp9/encoder/vp9_bitstream.h @@ -12,6 +12,14 @@ #ifndef VP9_ENCODER_VP9_BITSTREAM_H_ #define VP9_ENCODER_VP9_BITSTREAM_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_update_skip_probs(VP9_COMMON *cm, vp9_writer *bc); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_BITSTREAM_H_ diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index c1b95817f..713cc5132 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -17,6 +17,10 @@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + // motion search site typedef struct { MV mv; @@ -58,7 +62,7 @@ typedef struct { // motion vector cache for adaptive motion search control in partition // search loop int_mv pred_mv[MAX_REF_FRAMES]; - int pred_filter_type; + INTERP_FILTER pred_interp_filter; // Bit flag for each mode whether it has high error in comparison to others. unsigned int modes_with_high_error; @@ -235,23 +239,8 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, BLOCK_SIZE bsize) { } } -struct rdcost_block_args { - MACROBLOCK *x; - ENTROPY_CONTEXT t_above[16]; - ENTROPY_CONTEXT t_left[16]; - TX_SIZE tx_size; - int bw; - int bh; - int rate; - int64_t dist; - int64_t sse; - int this_rate; - int64_t this_dist; - int64_t this_sse; - int64_t this_rd; - int64_t best_rd; - int skip; - const int16_t *scan, *nb; -}; +#ifdef __cplusplus +} // extern "C" +#endif #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 0f4a6bb63..a840b480a 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -997,7 +997,7 @@ static INLINE int half_round_shift(int input) { return rv; } -static void dct32_1d(const int *input, int *output, int round) { +static void fdct32(const int *input, int *output, int round) { int step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; @@ -1329,7 +1329,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } @@ -1339,13 +1339,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; } } -// Note that although we use dct_32_round in dct32_1d computation flow, +// Note that although we use dct_32_round in dct32 computation flow, // this 2d fdct32x32 for rate-distortion optimization loop is operating // within 16 bits precision. void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { @@ -1357,7 +1357,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; @@ -1370,7 +1370,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 1); + fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) out[j + i * 32] = temp_out[j]; } diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h index aaf976d93..cf5f001a9 100644 --- a/vp9/encoder/vp9_dct.h +++ b/vp9/encoder/vp9_dct.h @@ -12,6 +12,10 @@ #ifndef VP9_ENCODER_VP9_DCT_H_ #define VP9_ENCODER_VP9_DCT_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, int stride); @@ -21,4 +25,8 @@ void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, int stride); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_DCT_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9d02c8f95..7b6da6c39 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -380,8 +380,10 @@ static void select_in_frame_q_segment(VP9_COMP *cpi, segment = 0; } - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } } // Fill in the entires in the segment map corresponding to this SB64 @@ -505,7 +507,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, vp9_update_mv_count(cpi, x, best_mv); } - if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) { + if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { const int ctx = vp9_get_pred_context_switchable_interp(xd); ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; } @@ -1029,131 +1031,171 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { } return 0; } - -// TODO(jingning) This currently serves as a test framework for non-RD mode -// decision. To be continued on optimizing the partition type decisions. -static void pick_partition_type(VP9_COMP *cpi, - const TileInfo *const tile, - MODE_INFO **mi_8x8, TOKENEXTRA **tp, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, - int do_recon) { +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + BLOCK_SIZE bsize, int output_enabled) { + int i; VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int mi_stride = cm->mode_info_stride; - const int num_8x8_subsize = (num_8x8_blocks_wide_lookup[bsize] >> 1); - int i; - PARTITION_TYPE partition = PARTITION_NONE; - BLOCK_SIZE subsize; - BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; - int sub_rate[4] = {0}; - int64_t sub_dist[4] = {0}; - int mi_offset; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; + const int mb_mode_index = ctx->best_mode_index; + int max_plane; - partition = partition_lookup[b_width_log2(bsize)][bs_type]; - subsize = get_subsize(bsize, partition); + max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; + for (i = 0; i < max_plane; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][1]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; + } + + for (i = max_plane; i < MAX_MB_PLANE; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][2]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; + p[i].eobs = ctx->eobs_pbuf[i][2]; + } + + x->skip = ctx->skip; + + if (frame_is_intra_only(cm)) { +#if CONFIG_INTERNAL_STATS + static const int kf_mode_index[] = { + THR_DC /*DC_PRED*/, + THR_V_PRED /*V_PRED*/, + THR_H_PRED /*H_PRED*/, + THR_D45_PRED /*D45_PRED*/, + THR_D135_PRED /*D135_PRED*/, + THR_D117_PRED /*D117_PRED*/, + THR_D153_PRED /*D153_PRED*/, + THR_D207_PRED /*D207_PRED*/, + THR_D63_PRED /*D63_PRED*/, + THR_TM /*TM_PRED*/, + }; + ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]; +#endif + } else { + // Note how often each mode chosen as best + cpi->mode_chosen_counts[mb_mode_index]++; + if (is_inter_block(mbmi) && + (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { + int_mv best_mv[2]; + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) + best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; + vp9_update_mv_count(cpi, x, best_mv); + } + + if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } + } +} + +static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 // there is nothing to be done. - if (x->ab_index != 0) { - *rate = 0; - *dist = 0; + if (x->ab_index > 0) return; - } + } + set_offsets(cpi, tile, mi_row, mi_col, bsize); + update_state_rt(cpi, get_block_context(x, bsize), bsize, output_enabled); + + encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); + update_stats(cpi); + + (*tp)->token = EOSB_TOKEN; + (*tp)++; +} + +static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + int ctx; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int idx_str = xd->mode_info_stride * mi_row + mi_col; + MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; + ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, + mi_row, mi_col, bsize); + subsize = mi_8x8[0]->mbmi.sb_type; + } else { - *(get_sb_partitioning(x, bsize)) = subsize; + ctx = 0; + subsize = BLOCK_4X4; } + partition = partition_lookup[bsl][subsize]; + switch (partition) { case PARTITION_NONE: - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist, - bsize, get_block_context(x, bsize), INT64_MAX); + if (output_enabled && bsize >= BLOCK_8X8) + cm->counts.partition[ctx][PARTITION_NONE]++; + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); break; - case PARTITION_HORZ: + case PARTITION_VERT: + if (output_enabled) + cm->counts.partition[ctx][PARTITION_VERT]++; *get_sb_index(x, subsize) = 0; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0], - subsize, get_block_context(x, subsize), INT64_MAX); - if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + if (mi_col + hbs < cm->mi_cols) { *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col, - &sub_rate[1], &sub_dist[1], subsize, - get_block_context(x, subsize), INT64_MAX); + encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + subsize); } - *rate = sub_rate[0] + sub_rate[1]; - *dist = sub_dist[0] + sub_dist[1]; break; - case PARTITION_VERT: + case PARTITION_HORZ: + if (output_enabled) + cm->counts.partition[ctx][PARTITION_HORZ]++; *get_sb_index(x, subsize) = 0; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0], - subsize, get_block_context(x, subsize), INT64_MAX); - if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + if (mi_row + hbs < cm->mi_rows) { *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize, - &sub_rate[1], &sub_dist[1], subsize, - get_block_context(x, subsize), INT64_MAX); + encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + subsize); } - *rate = sub_rate[0] + sub_rate[1]; - *dist = sub_dist[1] + sub_dist[1]; break; case PARTITION_SPLIT: - *get_sb_index(x, subsize) = 0; - pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize, - &sub_rate[0], &sub_dist[0], 0); - - if ((mi_col + num_8x8_subsize) < cm->mi_cols) { - *get_sb_index(x, subsize) = 1; - pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize, tp, - mi_row, mi_col + num_8x8_subsize, subsize, - &sub_rate[1], &sub_dist[1], 0); - } - - if ((mi_row + num_8x8_subsize) < cm->mi_rows) { - *get_sb_index(x, subsize) = 2; - pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize * mi_stride, tp, - mi_row + num_8x8_subsize, mi_col, subsize, - &sub_rate[2], &sub_dist[2], 0); - } - - if ((mi_col + num_8x8_subsize) < cm->mi_cols && - (mi_row + num_8x8_subsize) < cm->mi_rows) { - *get_sb_index(x, subsize) = 3; - mi_offset = num_8x8_subsize * mi_stride + num_8x8_subsize; - pick_partition_type(cpi, tile, mi_8x8 + mi_offset, tp, - mi_row + num_8x8_subsize, mi_col + num_8x8_subsize, - subsize, &sub_rate[3], &sub_dist[3], 0); - } - - for (i = 0; i < 4; ++i) { - *rate += sub_rate[i]; - *dist += sub_dist[i]; - } + subsize = get_subsize(bsize, PARTITION_SPLIT); + if (output_enabled) + cm->counts.partition[ctx][PARTITION_SPLIT]++; + *get_sb_index(x, subsize) = 0; + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + *get_sb_index(x, subsize) = 1; + encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + subsize); + *get_sb_index(x, subsize) = 2; + encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + subsize); + *get_sb_index(x, subsize) = 3; + encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, + subsize); break; default: - assert(0); + assert("Invalid partition type."); } - if (do_recon) { - int output_enabled = (bsize == BLOCK_64X64); - - // Check the projected output rate for this SB against it's target - // and and if necessary apply a Q delta using segmentation to get - // closer to the target. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, *rate); - } - - encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); - } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(cpi->above_seg_context, cpi->left_seg_context, + mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1444,15 +1486,19 @@ static void rd_use_partition(VP9_COMP *cpi, } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 + BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, + BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, + BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, + BLOCK_16X16 }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 + BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, + BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, + BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, + BLOCK_64X64 }; // Look at all the mode_info entries for blocks that are part of this @@ -1538,9 +1584,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, } } - // Give a bit of leaway either side of the observed min and max - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + *min_block_size = min_partition_size[*min_block_size]; + *max_block_size = max_partition_size[*max_block_size]; + } // Check border cases where max and min from neighbours may not be legal. *max_block_size = find_partition_size(*max_block_size, @@ -1788,9 +1836,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = i; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd); @@ -1839,9 +1887,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); @@ -1854,9 +1902,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), @@ -1892,9 +1940,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); @@ -1906,9 +1954,9 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); - if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) - get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, subsize)->pred_interp_filter = get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, &this_dist, subsize, get_block_context(x, subsize), @@ -1996,34 +2044,6 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile, restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64); } -static void encode_sb_row_rt(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, TOKENEXTRA **tp) { - VP9_COMMON *const cm = &cpi->common; - int mi_col; - - cpi->sf.always_this_block_size = BLOCK_8X8; - - // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); - - // Code each SB in the row - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; - const int idx_str = cm->mode_info_stride * mi_row + mi_col; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; - - vp9_zero(cpi->mb.pred_mv); - - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col); - pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1); - } -} - static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; @@ -2048,7 +2068,7 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) - get_block_context(x, i)->pred_filter_type = SWITCHABLE; + get_block_context(x, i)->pred_interp_filter = SWITCHABLE; } vp9_zero(cpi->mb.pred_mv); @@ -2136,7 +2156,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cm->counts.single_ref); vp9_zero(cm->counts.comp_ref); vp9_zero(cm->counts.tx); - vp9_zero(cm->counts.mbskip); + vp9_zero(cm->counts.skip); // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. @@ -2250,11 +2270,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += 8) -#if 1 encode_sb_row(cpi, &tile, mi_row, &tp); -#else - encode_sb_row_rt(cpi, &tile, mi_row, &tp); -#endif cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); @@ -2395,15 +2411,15 @@ static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) { } } -static int get_frame_type(VP9_COMP *cpi) { +static MV_REFERENCE_FRAME get_frame_type(VP9_COMP *cpi) { if (frame_is_intra_only(&cpi->common)) - return 0; + return INTRA_FRAME; else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) - return 3; + return ALTREF_FRAME; else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) - return 1; + return LAST_FRAME; else - return 2; + return GOLDEN_FRAME; } static void select_tx_mode(VP9_COMP *cpi) { @@ -2433,6 +2449,264 @@ static void select_tx_mode(VP9_COMP *cpi) { } } } +// Start RTC Exploration +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, + MB_PREDICTION_MODE mode, int mi_row, int mi_col) { + mbmi->interp_filter = EIGHTTAP; + mbmi->mode = mode; + mbmi->mv[0].as_int = 0; + mbmi->mv[1].as_int = 0; + if (mode < NEARESTMV) { + mbmi->ref_frame[0] = INTRA_FRAME; + } else { + mbmi->ref_frame[0] = LAST_FRAME; + } + + mbmi->ref_frame[1] = INTRA_FRAME; + mbmi->tx_size = max_txsize_lookup[bsize]; + mbmi->uv_mode = mode; + mbmi->skip_coeff = 0; + mbmi->sb_type = bsize; + mbmi->segment_id = 0; +} +static INLINE int get_block_row(int b32i, int b16i, int b8i) { + return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1); +} +static INLINE int get_block_col(int b32i, int b16i, int b8i) { + return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); +} +static void rtc_use_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + TOKENEXTRA **tp, int mi_row, int mi_col, + BLOCK_SIZE bsize, int *rate, int64_t *dist, + int do_recon) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size]; + int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size]; + int i, j; + int chosen_rate = INT_MAX; + int64_t chosen_dist = INT_MAX; + MB_PREDICTION_MODE mode = DC_PRED; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int b32i; + x->fast_ms = 0; + x->subblock_ref = 0; + for (b32i = 0; b32i < 4; b32i++) { + int b16i; + for (b16i = 0; b16i < 4; b16i++) { + int b8i; + int block_row = get_block_row(b32i, b16i, 0); + int block_col = get_block_col(b32i, b16i, 0); + int index = block_row * mis + block_col; + int rate; + int64_t dist; + + int_mv frame_nearest_mv[MAX_REF_FRAMES]; + int_mv frame_near_mv[MAX_REF_FRAMES]; + struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]; + + // Find a partition size that fits + bsize = find_partition_size(cpi->sf.always_this_block_size, + (row8x8_remaining - block_row), + (col8x8_remaining - block_col), + &mi_height, &mi_width); + mi_8x8[index] = mi_8x8[0] + index; + + set_mi_row_col(xd, tile, mi_row + block_row, mi_height, + mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols); + + xd->mi_8x8 = mi_8x8 + index; + + if (cm->frame_type != KEY_FRAME) { + set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize); + + vp9_pick_inter_mode(cpi, x, tile, + mi_row + block_row, mi_col + block_col, + &rate, &dist, bsize); + } else { + set_mode_info(&mi_8x8[index]->mbmi, bsize, mode, + mi_row + block_row, mi_col + block_col); + vp9_setup_buffer_inter(cpi, x, tile, + LAST_FRAME, cpi->sf.always_this_block_size, + mi_row + block_row, mi_col + block_col, + frame_nearest_mv, frame_near_mv, yv12_mb); + } + + for (j = 0; j < mi_height; j++) + for (i = 0; i < mi_width; i++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) { + mi_8x8[index+ i + j * mis] = mi_8x8[index]; + } + + for (b8i = 0; b8i < 4; b8i++) { + } + } + } + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); + + *rate = chosen_rate; + *dist = chosen_dist; +} + +static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, TOKENEXTRA **tp) { + VP9_COMMON * const cm = &cpi->common; + int mi_col; + + // Initialize the left context for the new SB row + vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); + vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + + // Code each SB in the row + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + int dummy_rate; + int64_t dummy_dist; + + const int idx_str = cm->mode_info_stride * mi_row + mi_col; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; + + cpi->mb.source_variance = UINT_MAX; + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col); + rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1); + } +} + + +static void encode_rtc_frame_internal(VP9_COMP *cpi) { + int mi_row; + MACROBLOCK * const x = &cpi->mb; + VP9_COMMON * const cm = &cpi->common; + MACROBLOCKD * const xd = &x->e_mbd; + +// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", +// cpi->common.current_video_frame, cpi->common.show_frame, +// cm->frame_type); + +// debug output +#if DBG_PRNT_SEGMAP + { + FILE *statsfile; + statsfile = fopen("segmap2.stt", "a"); + fprintf(statsfile, "\n"); + fclose(statsfile); + } +#endif + + vp9_zero(cm->counts.switchable_interp); + vp9_zero(cpi->tx_stepdown_count); + + xd->mi_8x8 = cm->mi_grid_visible; + // required for vp9_frame_init_quantizer + xd->mi_8x8[0] = cm->mi; + + xd->last_mi = cm->prev_mi; + + vp9_zero(cpi->common.counts.mv); + vp9_zero(cpi->coef_counts); + vp9_zero(cm->counts.eob_branch); + + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 + && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); + + vp9_frame_init_quantizer(cpi); + + vp9_initialize_rd_consts(cpi); + vp9_initialize_me_consts(cpi, cm->base_qindex); + switch_tx_mode(cpi); + cpi->sf.always_this_block_size = BLOCK_16X16; + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + // Initialize encode frame context. + init_encode_frame_mb_context(cpi); + + // Build a frame level activity map + build_activity_map(cpi); + } + + // Re-initialize encode frame context. + init_encode_frame_mb_context(cpi); + + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); + + set_prev_mi(cm); + + { + struct vpx_usec_timer emr_timer; + vpx_usec_timer_start(&emr_timer); + + { + // Take tiles into account and give start/end MB + int tile_col, tile_row; + TOKENEXTRA *tp = cpi->tok; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + + for (tile_row = 0; tile_row < tile_rows; tile_row++) { + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + TileInfo tile; + TOKENEXTRA *tp_old = tp; + + // For each row of SBs in the frame + vp9_tile_init(&tile, cm, tile_row, tile_col); + for (mi_row = tile.mi_row_start; + mi_row < tile.mi_row_end; mi_row += 8) + encode_rtc_sb_row(cpi, &tile, mi_row, &tp); + + cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); + assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); + } + } + } + + vpx_usec_timer_mark(&emr_timer); + cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); + } + + if (cpi->sf.skip_encode_sb) { + int j; + unsigned int intra_count = 0, inter_count = 0; + for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { + intra_count += cm->counts.intra_inter[j][0]; + inter_count += cm->counts.intra_inter[j][1]; + } + cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); + cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); + cpi->sf.skip_encode_frame &= cm->show_frame; + } else { + cpi->sf.skip_encode_frame = 0; + } + +#if 0 + // Keep record of the total distortion this time around for future use + cpi->last_frame_distortion = cpi->frame_distortion; +#endif +} +// end RTC play code + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -2460,7 +2734,6 @@ void vp9_encode_frame(VP9_COMP *cpi) { if (cpi->sf.RD) { int i; REFERENCE_MODE reference_mode; - INTERPOLATION_TYPE filter_type; /* * This code does a single RD pass over the whole frame assuming * either compound, single or hybrid prediction as per whatever has @@ -2470,7 +2743,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { * that for subsequent frames. * It does the same analysis for transform size selection also. */ - const int frame_type = get_frame_type(cpi); + const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type]; const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type]; @@ -2488,22 +2761,18 @@ void vp9_encode_frame(VP9_COMP *cpi) { else reference_mode = REFERENCE_MODE_SELECT; - /* filter type selection */ - // FIXME(rbultje) for some odd reason, we often select smooth_filter - // as default filter for ARF overlay frames. This is a REALLY BAD - // IDEA so we explicitly disable it here. - if (frame_type != 3 && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP_SMOOTH; - } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] && - filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP_SHARP; - } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) { - filter_type = EIGHTTAP; - } else { - filter_type = SWITCHABLE; + if (cm->interp_filter == SWITCHABLE) { + if (frame_type != ALTREF_FRAME && + filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] && + filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] && + filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) { + cm->interp_filter = EIGHTTAP_SMOOTH; + } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] && + filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) { + cm->interp_filter = EIGHTTAP_SHARP; + } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) { + cm->interp_filter = EIGHTTAP; + } } cpi->mb.e_mbd.lossless = cpi->oxcf.lossless; @@ -2511,8 +2780,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { /* transform size selection (4x4, 8x8, 16x16 or select-per-mb) */ select_tx_mode(cpi); cm->reference_mode = reference_mode; - cm->mcomp_filter_type = filter_type; - encode_frame_internal(cpi); + + if (cpi->sf.super_fast_rtc) + encode_rtc_frame_internal(cpi); + else + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs); @@ -2590,7 +2862,12 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - encode_frame_internal(cpi); + // Force the usage of the BILINEAR interp_filter. + cm->interp_filter = BILINEAR; + if (cpi->sf.super_fast_rtc) + encode_rtc_frame_internal(cpi); + else + encode_frame_internal(cpi); } } @@ -2666,7 +2943,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ); + (cpi->oxcf.aq_mode != COMPLEXITY_AQ) && + !cpi->sf.super_fast_rtc; x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; @@ -2681,7 +2959,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_update_zbin_extra(cpi, x); } } else { - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. @@ -2721,7 +3000,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, } else { mbmi->skip_coeff = 1; if (output_enabled) - cm->counts.mbskip[vp9_get_skip_context(xd)][1]++; + cm->counts.skip[vp9_get_skip_context(xd)][1]++; reset_skip_context(xd, MAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 3e9f5381c..f7d17c301 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -12,6 +12,10 @@ #ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_ #define VP9_ENCODER_VP9_ENCODEFRAME_H_ +#ifdef __cplusplus +extern "C" { +#endif + struct macroblock; struct yv12_buffer_config; @@ -19,4 +23,8 @@ void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_ENCODEFRAME_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 4bef67501..8ff23c79a 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -25,26 +25,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_tokenize.h" -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATION_TYPE mcomp_filter_type, - VP9_COMMON *cm) { - if (xd->mi_8x8 && xd->mi_8x8[0]) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - - set_scale_factors(cm, xd, mbmi->ref_frame[0] - LAST_FRAME, - mbmi->ref_frame[1] - LAST_FRAME); - - } else { - set_scale_factors(cm, xd, -1, -1); - } - - xd->subpix.filter_x = xd->subpix.filter_y = - vp9_get_filter_kernel(mcomp_filter_type == SWITCHABLE ? - EIGHTTAP : mcomp_filter_type); - - assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); -} - void vp9_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, @@ -358,7 +338,6 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize, pd->above_context, pd->left_context, num_4x4_w, num_4x4_h); } - void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 207d573a5..9f6c9f069 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -16,6 +16,10 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/common/vp9_onyxc_int.h" +#ifdef __cplusplus +extern "C" { +#endif + struct optimize_ctx { ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; @@ -44,7 +48,9 @@ void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize); int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred); -void vp9_setup_interp_filters(MACROBLOCKD *xd, - INTERPOLATION_TYPE mcomp_filter_type, - VP9_COMMON *cm); + +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index 761278fd1..c57b01db4 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -14,6 +14,10 @@ #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_entropy_mv_init(); void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer* const); @@ -30,4 +34,8 @@ void vp9_build_nmv_cost_table(int *mvjoint, void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_ENCODEMV_H_ diff --git a/vp9/encoder/vp9_extend.h b/vp9/encoder/vp9_extend.h index 9b95ee437..058fe09cf 100644 --- a/vp9/encoder/vp9_extend.h +++ b/vp9/encoder/vp9_extend.h @@ -14,6 +14,10 @@ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst); @@ -22,4 +26,8 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_EXTEND_H_ diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 56872682a..a03cbdd86 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -49,14 +49,15 @@ #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) +#define MIN_BOOST 300 +#define KEY_FRAME_BOOST 2000 + static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; *a = *b; *b = temp; } -static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame); - static int select_cq_level(int qindex) { int ret_val = QINDEX_RANGE - 1; int i; @@ -369,14 +370,11 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { } } -static unsigned int zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, - YV12_BUFFER_CONFIG *recon_buffer, - int recon_yoffset) { - MACROBLOCKD *const xd = &x->e_mbd; +static unsigned int zz_motion_search(const VP9_COMP *cpi, const MACROBLOCK *x) { + const MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *const src = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; - const uint8_t *const ref = xd->plane[0].pre[0].buf - = recon_buffer->y_buffer + recon_yoffset; + const uint8_t *const ref = xd->plane[0].pre[0].buf; const int ref_stride = xd->plane[0].pre[0].stride; unsigned int sse; @@ -387,8 +385,7 @@ static unsigned int zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *best_mv, - YV12_BUFFER_CONFIG *recon_buffer, - int *best_motion_err, int recon_yoffset) { + int *best_motion_err) { MACROBLOCKD *const xd = &x->e_mbd; MV tmp_mv = {0, 0}; MV ref_mv_full = {ref_mv->row >> 3, ref_mv->col >> 3}; @@ -411,9 +408,6 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // override the default variance function to use MSE v_fn_ptr.vf = get_block_variance_fn(bsize); - // Set up pointers for this macro block recon buffer - xd->plane[0].pre[0].buf = recon_buffer->y_buffer + recon_yoffset; - // Initial step/diamond search centred on best mv tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv, step_param, @@ -456,6 +450,16 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } } +static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { + if (2 * mb_col + 1 < cm->mi_cols) { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 + : BLOCK_16X8; + } else { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 + : BLOCK_8X8; + } +} + void vp9_first_pass(VP9_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; @@ -480,7 +484,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int sum_mvr = 0, sum_mvc = 0; int sum_mvr_abs = 0, sum_mvc_abs = 0; - int sum_mvrs = 0, sum_mvcs = 0; + int64_t sum_mvrs = 0, sum_mvcs = 0; int mvcount = 0; int intercount = 0; int second_ref_count = 0; @@ -490,10 +494,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; struct twopass_rc *const twopass = &cpi->twopass; - - int_mv zero_ref_mv; - - zero_ref_mv.as_int = 0; + const MV zero_mv = {0, 0}; vp9_clear_system_state(); // __asm emms; @@ -502,8 +503,7 @@ void vp9_first_pass(VP9_COMP *cpi) { setup_dst_planes(xd, new_yv12, 0, 0); xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; + xd->mi_8x8[0] = cm->mi; // required for vp9_frame_init_quantizer setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -517,14 +517,8 @@ void vp9_first_pass(VP9_COMP *cpi) { } x->skip_recode = 0; - - // Initialise the MV cost table to the defaults - // if( cm->current_video_frame == 0) - // if ( 0 ) - { - vp9_init_mv_probs(cm); - vp9_initialize_rd_consts(cpi); - } + vp9_init_mv_probs(cm); + vp9_initialize_rd_consts(cpi); // tiling is ignored in the first pass vp9_tile_init(&tile, cm, 0, 0); @@ -549,9 +543,9 @@ void vp9_first_pass(VP9_COMP *cpi) { // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int this_error; - int gf_motion_error = INT_MAX; - int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); double error_weight = 1.0; + const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); vp9_clear_system_state(); // __asm emms; @@ -559,30 +553,15 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - - if (mb_col * 2 + 1 < cm->mi_cols) { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X8; - } - } else { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X8; - } - } + xd->mi_8x8[0]->mbmi.sb_type = bsize; xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, - mb_row << 1, - num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type], - mb_col << 1, - num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type], + mb_row << 1, num_8x8_blocks_high_lookup[bsize], + mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - int energy = vp9_block_energy(cpi, x, xd->mi_8x8[0]->mbmi.sb_type); + const int energy = vp9_block_energy(cpi, x, bsize); error_weight = vp9_vaq_inv_q_ratio(energy); } @@ -608,21 +587,22 @@ void vp9_first_pass(VP9_COMP *cpi) { // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) - + BORDER_MV_PIXELS_B16; + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search if (cm->current_video_frame > 0) { - int tmp_err; - int motion_error = zz_motion_search(cpi, x, lst_yv12, recon_yoffset); + int tmp_err, motion_error; int_mv mv, tmp_mv; + + xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; + motion_error = zz_motion_search(cpi, x); // Simple 0,0 motion with no mv overhead mv.as_int = tmp_mv.as_int = 0; // Test last reference frame using the previous best mv as the // starting point (best reference) for the search first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv, - lst_yv12, &motion_error, recon_yoffset); + &motion_error); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; motion_error *= error_weight; @@ -632,8 +612,8 @@ void vp9_first_pass(VP9_COMP *cpi) { // based search as well. if (best_ref_mv.as_int) { tmp_err = INT_MAX; - first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv, - lst_yv12, &tmp_err, recon_yoffset); + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, + &tmp_err); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; tmp_err *= error_weight; @@ -648,19 +628,20 @@ void vp9_first_pass(VP9_COMP *cpi) { // Experimental search in an older reference frame if (cm->current_video_frame > 1) { // Simple 0,0 motion with no mv overhead - gf_motion_error = zz_motion_search(cpi, x, gld_yv12, recon_yoffset); + int gf_motion_error; + + xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; + gf_motion_error = zz_motion_search(cpi, x); - first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv, - gld_yv12, &gf_motion_error, recon_yoffset); + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, + &gf_motion_error); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; gf_motion_error *= error_weight; } - if ((gf_motion_error < motion_error) && - (gf_motion_error < this_error)) { + if (gf_motion_error < motion_error && gf_motion_error < this_error) second_ref_count++; - } // Reset to last frame as reference buffer xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; @@ -697,9 +678,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; - vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, - xd->mi_8x8[0]->mbmi.sb_type); - vp9_encode_sby(x, xd->mi_8x8[0]->mbmi.sb_type); + vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); + vp9_encode_sby(x, bsize); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; @@ -789,13 +769,11 @@ void vp9_first_pass(VP9_COMP *cpi) { fps.mvr_abs = (double)sum_mvr_abs / mvcount; fps.MVc = (double)sum_mvc / mvcount; fps.mvc_abs = (double)sum_mvc_abs / mvcount; - fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / - mvcount; - fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / - mvcount; + fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / mvcount; + fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount; fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2); fps.new_mv_count = new_mv_count; - fps.pcnt_motion = (double)mvcount / cpi->common.MBs; + fps.pcnt_motion = (double)mvcount / cm->MBs; } else { fps.MVr = 0.0; fps.mvr_abs = 0.0; @@ -923,11 +901,10 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -static int estimate_max_q(VP9_COMP *cpi, - FIRSTPASS_STATS *fpstats, +static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { int q; - int num_mbs = cpi->common.MBs; + const int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; RATE_CONTROL *const rc = &cpi->rc; @@ -953,9 +930,8 @@ static int estimate_max_q(VP9_COMP *cpi, } // Restriction on active max q for constrained quality mode. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && - q < cpi->cq_target_quality) - q = cpi->cq_target_quality; + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + q = MAX(q, cpi->cq_target_quality); return q; } @@ -1018,6 +994,7 @@ void vp9_init_second_pass(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame; FIRSTPASS_STATS *start_pos; struct twopass_rc *const twopass = &cpi->twopass; + const VP9_CONFIG *const oxcf = &cpi->oxcf; zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); @@ -1036,9 +1013,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { vp9_new_framerate(cpi, 10000000.0 * twopass->total_stats.count / twopass->total_stats.duration); - cpi->output_framerate = cpi->oxcf.framerate; + cpi->output_framerate = oxcf->framerate; twopass->bits_left = (int64_t)(twopass->total_stats.duration * - cpi->oxcf.target_bandwidth / 10000000.0); + oxcf->target_bandwidth / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio // scores used in the second pass. We have this minimum to make sure @@ -1054,15 +1031,12 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // ratio for the sequence. { double sum_iiratio = 0.0; - double IIRatio; - start_pos = twopass->stats_in; // Note the starting "file" position. while (input_stats(twopass, &this_frame) != EOF) { - IIRatio = this_frame.intra_error - / DOUBLE_DIVIDE_CHECK(this_frame.coded_error); - IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio; - sum_iiratio += IIRatio; + const double iiratio = this_frame.intra_error / + DOUBLE_DIVIDE_CHECK(this_frame.coded_error); + sum_iiratio += fclamp(iiratio, 1.0, 20.0); } twopass->avg_iiratio = sum_iiratio / @@ -1082,9 +1056,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { twopass->modified_error_total = 0.0; twopass->modified_error_min = - (av_error * cpi->oxcf.two_pass_vbrmin_section) / 100; + (av_error * oxcf->two_pass_vbrmin_section) / 100; twopass->modified_error_max = - (av_error * cpi->oxcf.two_pass_vbrmax_section) / 100; + (av_error * oxcf->two_pass_vbrmax_section) / 100; while (input_stats(twopass, &this_frame) != EOF) { twopass->modified_error_total += @@ -1101,12 +1075,12 @@ void vp9_end_second_pass(VP9_COMP *cpi) { // This function gives and estimate of how badly we believe // the prediction quality is decaying from frame to frame. -static double get_prediction_decay_rate(VP9_COMP *cpi, - FIRSTPASS_STATS *next_frame) { +static double get_prediction_decay_rate(const VP9_COMMON *cm, + const FIRSTPASS_STATS *next_frame) { // Look at the observed drop in prediction quality between the last frame // and the GF buffer (which contains an older frame). const double mb_sr_err_diff = (next_frame->sr_coded_error - - next_frame->coded_error) / cpi->common.MBs; + next_frame->coded_error) / cm->MBs; const double second_ref_decay = mb_sr_err_diff <= 512.0 ? fclamp(pow(1.0 - (mb_sr_err_diff / 512.0), 0.5), 0.85, 1.0) : 0.85; @@ -1134,7 +1108,6 @@ static int detect_transition_to_still( int j; FIRSTPASS_STATS *position = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_next_frame; - double zz_inter; // Look ahead a few frames to see if static condition // persists... @@ -1142,11 +1115,10 @@ static int detect_transition_to_still( if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) break; - zz_inter = (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion); - if (zz_inter < 0.999) + if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - // Reset file position + reset_fpf_position(&cpi->twopass, position); // Only if it does do we signal a transition to still @@ -1160,14 +1132,14 @@ static int detect_transition_to_still( // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this -static int detect_flash(VP9_COMP *cpi, int offset) { +static int detect_flash(const struct twopass_rc *twopass, int offset) { FIRSTPASS_STATS next_frame; int flash_detected = 0; // Read the frame data. // The return is FALSE (no flash detected) if not a valid frame - if (read_frame_stats(&cpi->twopass, &next_frame, offset) != EOF) { + if (read_frame_stats(twopass, &next_frame, offset) != EOF) { // What we are looking for here is a situation where there is a // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. @@ -1188,9 +1160,6 @@ static void accumulate_frame_motion_stats( double *mv_in_out_accumulator, double *abs_mv_in_out_accumulator, double *mv_ratio_accumulator) { - // double this_frame_mv_in_out; - double this_frame_mvr_ratio; - double this_frame_mvc_ratio; double motion_pct; // Accumulate motion stats. @@ -1199,35 +1168,30 @@ static void accumulate_frame_motion_stats( // Accumulate Motion In/Out of frame stats *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; - *abs_mv_in_out_accumulator += - fabs(this_frame->mv_in_out_count * motion_pct); + *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct); // Accumulate a measure of how uniform (or conversely how random) // the motion field is. (A ratio of absmv / mv) if (motion_pct > 0.05) { - this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / + const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr)); - this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / + const double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc)); - *mv_ratio_accumulator += - (this_frame_mvr_ratio < this_frame->mvr_abs) + *mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs) ? (this_frame_mvr_ratio * motion_pct) : this_frame->mvr_abs * motion_pct; - *mv_ratio_accumulator += - (this_frame_mvc_ratio < this_frame->mvc_abs) + *mv_ratio_accumulator += (this_frame_mvc_ratio < this_frame->mvc_abs) ? (this_frame_mvc_ratio * motion_pct) : this_frame->mvc_abs * motion_pct; } } // Calculate a baseline boost number for the current frame. -static double calc_frame_boost( - VP9_COMP *cpi, - FIRSTPASS_STATS *this_frame, - double this_frame_mv_in_out) { +static double calc_frame_boost(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame, + double this_frame_mv_in_out) { double frame_boost; // Underlying boost factor is based on inter intra error ratio @@ -1248,18 +1212,14 @@ static double calc_frame_boost( else frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - // Clip to maximum - if (frame_boost > GF_RMAX) - frame_boost = GF_RMAX; - - return frame_boost; + return MIN(frame_boost, GF_RMAX); } static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames, int *f_boost, int *b_boost) { FIRSTPASS_STATS this_frame; - + struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; @@ -1272,7 +1232,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // Search forward from the proposed arf/next gf position for (i = 0; i < f_frames; i++) { - if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF) + if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF) break; // Update the motion related elements to the boost calculation @@ -1283,12 +1243,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); + flash_detected = detect_flash(twopass, i + offset) || + detect_flash(twopass, i + offset + 1); // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1309,7 +1269,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // Search backward towards last gf position for (i = -1; i >= -b_frames; i--) { - if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF) + if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF) break; // Update the motion related elements to the boost calculation @@ -1320,12 +1280,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // We want to discount the the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); + flash_detected = detect_flash(twopass, i + offset) || + detect_flash(twopass, i + offset + 1); // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1485,6 +1445,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) { static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame = { 0 }; FIRSTPASS_STATS *start_pos; + struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1505,8 +1466,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_ratio_accumulator_thresh; int max_bits = frame_max_bits(cpi); // Max for a single frame - unsigned int allow_alt_ref = - cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; + unsigned int allow_alt_ref = cpi->oxcf.play_alternate && + cpi->oxcf.lag_in_frames; int f_boost = 0; int b_boost = 0; @@ -1514,11 +1475,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int active_max_gf_interval; RATE_CONTROL *const rc = &cpi->rc; - cpi->twopass.gf_group_bits = 0; + twopass->gf_group_bits = 0; vp9_clear_system_state(); // __asm emms; - start_pos = cpi->twopass.stats_in; + start_pos = twopass->stats_in; // Load stats for the current frame. mod_frame_err = calculate_modified_err(cpi, this_frame); @@ -1549,20 +1510,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { active_max_gf_interval = rc->max_gf_interval; i = 0; - while ((i < cpi->twopass.static_scene_max_gf_interval) && - (i < rc->frames_to_key)) { + while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) { i++; // Increment the loop counter // Accumulate error score of frames in this gf group mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; - if (EOF == input_stats(&cpi->twopass, &next_frame)) + if (EOF == input_stats(twopass, &next_frame)) break; // Test for the case where there is a brief flash but the prediction // quality back to an earlier frame is then restored. - flash_detected = detect_flash(cpi, 0); + flash_detected = detect_flash(twopass, 0); // Update the motion related elements to the boost calculation accumulate_frame_motion_stats(&next_frame, @@ -1573,14 +1533,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Cumulative effect of prediction quality decay if (!flash_detected) { last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; // Monitor for static sections. if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < zero_motion_accumulator) { - zero_motion_accumulator = - (next_frame.pcnt_inter - next_frame.pcnt_motion); + zero_motion_accumulator = next_frame.pcnt_inter - + next_frame.pcnt_motion; } // Break clause to detect very still sections after motion @@ -1618,14 +1578,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { old_boost_score = boost_score; } - cpi->twopass.gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); + twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); // Don't allow a gf too near the next kf if ((rc->frames_to_key - i) < MIN_GF_INTERVAL) { while (i < (rc->frames_to_key + !rc->next_key_frame_forced)) { i++; - if (EOF == input_stats(&cpi->twopass, this_frame)) + if (EOF == input_stats(twopass, this_frame)) break; if (i < rc->frames_to_key) { @@ -1927,186 +1887,6 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { return 0; } -void vp9_get_svc_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % - cpi->key_frame_frequency == 0))) { - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; -} - -void vp9_get_one_pass_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || - (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { - cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = 300; - } else { - cm->frame_type = INTER_FRAME; - } - if (cpi->rc.frames_till_gf_update_due == 0) { - cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; - cpi->refresh_golden_frame = 1; - } -} - -void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if ((cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || - (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { - cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = 300; - } else { - cm->frame_type = INTER_FRAME; - } - // Don't use gf_update by default in CBR mode. - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; -} - -void vp9_get_first_pass_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || - cm->frame_flags & FRAMEFLAGS_KEY)) { - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - // Do not use periodic key frames - cpi->rc.frames_to_key = INT_MAX; -} - -void vp9_get_second_pass_params(VP9_COMP *cpi) { - int tmp_q; - int frames_left = (int)(cpi->twopass.total_stats.count - - cpi->common.current_video_frame); - - FIRSTPASS_STATS this_frame; - FIRSTPASS_STATS this_frame_copy; - RATE_CONTROL *rc = &cpi->rc; - - double this_frame_intra_error; - double this_frame_coded_error; - - if (cpi->refresh_alt_ref_frame) { - cpi->common.frame_type = INTER_FRAME; - return; - } - if (!cpi->twopass.stats_in) - return; - - vp9_clear_system_state(); - - if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { - rc->active_worst_quality = cpi->oxcf.cq_level; - } else if (cpi->common.current_video_frame == 0) { - // Special case code for first frame. - int section_target_bandwidth = - (int)(cpi->twopass.bits_left / frames_left); - - tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, - section_target_bandwidth); - - rc->active_worst_quality = tmp_q; - rc->ni_av_qi = tmp_q; - rc->avg_q = vp9_convert_qindex_to_q(tmp_q); - - // Limit the maxq value returned subsequently. - // This increases the risk of overspend or underspend if the initial - // estimate for the clip is bad, but helps prevent excessive - // variation in Q, especially near the end of a clip - // where for example a small overspend may cause Q to crash - // adjust_maxq_qrange(cpi); - } - vp9_zero(this_frame); - if (EOF == input_stats(&cpi->twopass, &this_frame)) - return; - - this_frame_intra_error = this_frame.intra_error; - this_frame_coded_error = this_frame.coded_error; - - // keyframe and section processing ! - if (rc->frames_to_key == 0 || - (cpi->common.frame_flags & FRAMEFLAGS_KEY)) { - // Define next KF group and assign bits to it - this_frame_copy = this_frame; - find_next_key_frame(cpi, &this_frame_copy); - } else { - cpi->common.frame_type = INTER_FRAME; - } - - // Is this a GF / ARF (Note that a KF is always also a GF) - if (rc->frames_till_gf_update_due == 0) { - // Define next gf group and assign bits to it - this_frame_copy = this_frame; - -#if CONFIG_MULTIPLE_ARF - if (cpi->multi_arf_enabled) { - define_fixed_arf_period(cpi); - } else { -#endif - define_gf_group(cpi, &this_frame_copy); -#if CONFIG_MULTIPLE_ARF - } -#endif - - if (cpi->twopass.gf_zeromotion_pct > 995) { - // As long as max_thresh for encode breakout is small enough, it is ok - // to enable it for no-show frame, i.e. set enable_encode_breakout to 2. - if (!cpi->common.show_frame) - cpi->enable_encode_breakout = 0; - else - cpi->enable_encode_breakout = 2; - } - - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - cpi->refresh_golden_frame = 1; - } else { - // Otherwise this is an ordinary frame - // Assign bits from those allocated to the GF group - this_frame_copy = this_frame; - assign_std_frame_bits(cpi, &this_frame_copy); - } - - // Keep a globally available copy of this and the next frame's iiratio. - cpi->twopass.this_iiratio = (int)(this_frame_intra_error / - DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); - { - FIRSTPASS_STATS next_frame; - if (lookup_next_frame_stats(&cpi->twopass, &next_frame) != EOF) { - cpi->twopass.next_iiratio = (int)(next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); - } - } - - // Set nominal per second bandwidth for this frame - cpi->target_bandwidth = (int)(rc->per_frame_bandwidth * - cpi->output_framerate); - if (cpi->target_bandwidth < 0) - cpi->target_bandwidth = 0; - - // Update the total stats remaining structure - subtract_stats(&cpi->twopass.total_left_stats, &this_frame); -} - static int test_candidate_kf(VP9_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, @@ -2137,7 +1917,6 @@ static int test_candidate_kf(VP9_COMP *cpi, double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; - double next_iiratio; local_next_frame = *next_frame; @@ -2146,8 +1925,8 @@ static int test_candidate_kf(VP9_COMP *cpi, // Examine how well the key frame predicts subsequent frames for (i = 0; i < 16; i++) { - next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); + double next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); if (next_iiratio > RMAX) next_iiratio = RMAX; @@ -2269,7 +2048,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); // We want to know something about the recent past... rather than // as used elsewhere where we are concened with decay in prediction @@ -2403,9 +2182,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { r = RMAX; // How fast is prediction quality decaying - if (!detect_flash(cpi, 0)) { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - decay_accumulator = decay_accumulator * loop_decay_rate; + if (!detect_flash(twopass, 0)) { + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); + decay_accumulator *= loop_decay_rate; decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -2443,8 +2222,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (kf_boost < (rc->frames_to_key * 3)) kf_boost = (rc->frames_to_key * 3); - if (kf_boost < 300) // Min KF boost - kf_boost = 300; + if (kf_boost < MIN_BOOST) + kf_boost = MIN_BOOST; // Make a note of baseline boost and the zero motion // accumulator value for use elsewhere. @@ -2526,6 +2305,199 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { twopass->modified_error_left -= kf_group_err; } +void vp9_get_svc_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if ((cm->current_video_frame == 0) || + (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + cpi->key_frame_frequency == 0))) { + cm->frame_type = KEY_FRAME; + cpi->rc.source_alt_ref_active = 0; + } else { + cm->frame_type = INTER_FRAME; + } + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; +} + +// Use this macro to turn on/off use of alt-refs in one-pass mode. +#define USE_ALTREF_FOR_ONE_PASS 1 + +void vp9_get_one_pass_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (!cpi->refresh_alt_ref_frame && + (cm->current_video_frame == 0 || + cm->frame_flags & FRAMEFLAGS_KEY || + cpi->rc.frames_to_key == 0 || + (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + cm->frame_type = KEY_FRAME; + cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && + cpi->rc.frames_to_key == 0; + cpi->rc.frames_to_key = cpi->key_frame_frequency; + cpi->rc.kf_boost = KEY_FRAME_BOOST; + cpi->rc.source_alt_ref_active = 0; + } else { + cm->frame_type = INTER_FRAME; + } + if (cpi->rc.frames_till_gf_update_due == 0) { + cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; + cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; + // NOTE: frames_till_gf_update_due must be <= frames_to_key. + if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key) + cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key; + cpi->refresh_golden_frame = 1; + cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; + cpi->rc.gfu_boost = 1000; + } +} + +void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if ((cm->current_video_frame == 0 || + cm->frame_flags & FRAMEFLAGS_KEY || + cpi->rc.frames_to_key == 0 || + (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { + cm->frame_type = KEY_FRAME; + cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && + cpi->rc.frames_to_key == 0; + cpi->rc.frames_to_key = cpi->key_frame_frequency; + cpi->rc.kf_boost = KEY_FRAME_BOOST; + cpi->rc.source_alt_ref_active = 0; + } else { + cm->frame_type = INTER_FRAME; + } + // Don't use gf_update by default in CBR mode. + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; +} + +void vp9_get_first_pass_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (!cpi->refresh_alt_ref_frame && + (cm->current_video_frame == 0 || + cm->frame_flags & FRAMEFLAGS_KEY)) { + cm->frame_type = KEY_FRAME; + } else { + cm->frame_type = INTER_FRAME; + } + // Do not use periodic key frames + cpi->rc.frames_to_key = INT_MAX; +} + +void vp9_get_second_pass_params(VP9_COMP *cpi) { + int tmp_q; + int frames_left = (int)(cpi->twopass.total_stats.count - + cpi->common.current_video_frame); + + FIRSTPASS_STATS this_frame; + FIRSTPASS_STATS this_frame_copy; + RATE_CONTROL *rc = &cpi->rc; + + double this_frame_intra_error; + double this_frame_coded_error; + + if (!cpi->twopass.stats_in) + return; + if (cpi->refresh_alt_ref_frame) { + cpi->common.frame_type = INTER_FRAME; + rc->per_frame_bandwidth = cpi->twopass.gf_bits; + return; + } + + vp9_clear_system_state(); + + if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { + rc->active_worst_quality = cpi->oxcf.cq_level; + } else if (cpi->common.current_video_frame == 0) { + // Special case code for first frame. + int section_target_bandwidth = + (int)(cpi->twopass.bits_left / frames_left); + + tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, + section_target_bandwidth); + + rc->active_worst_quality = tmp_q; + rc->ni_av_qi = tmp_q; + rc->avg_q = vp9_convert_qindex_to_q(tmp_q); + + // Limit the maxq value returned subsequently. + // This increases the risk of overspend or underspend if the initial + // estimate for the clip is bad, but helps prevent excessive + // variation in Q, especially near the end of a clip + // where for example a small overspend may cause Q to crash + // adjust_maxq_qrange(cpi); + } + vp9_zero(this_frame); + if (EOF == input_stats(&cpi->twopass, &this_frame)) + return; + + this_frame_intra_error = this_frame.intra_error; + this_frame_coded_error = this_frame.coded_error; + + // keyframe and section processing ! + if (rc->frames_to_key == 0 || + (cpi->common.frame_flags & FRAMEFLAGS_KEY)) { + // Define next KF group and assign bits to it + this_frame_copy = this_frame; + find_next_key_frame(cpi, &this_frame_copy); + } else { + cpi->common.frame_type = INTER_FRAME; + } + + // Is this a GF / ARF (Note that a KF is always also a GF) + if (rc->frames_till_gf_update_due == 0) { + // Define next gf group and assign bits to it + this_frame_copy = this_frame; + +#if CONFIG_MULTIPLE_ARF + if (cpi->multi_arf_enabled) { + define_fixed_arf_period(cpi); + } else { +#endif + define_gf_group(cpi, &this_frame_copy); +#if CONFIG_MULTIPLE_ARF + } +#endif + + if (cpi->twopass.gf_zeromotion_pct > 995) { + // As long as max_thresh for encode breakout is small enough, it is ok + // to enable it for no-show frame, i.e. set enable_encode_breakout to 2. + if (!cpi->common.show_frame) + cpi->enable_encode_breakout = 0; + else + cpi->enable_encode_breakout = 2; + } + + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + cpi->refresh_golden_frame = 1; + } else { + // Otherwise this is an ordinary frame + // Assign bits from those allocated to the GF group + this_frame_copy = this_frame; + assign_std_frame_bits(cpi, &this_frame_copy); + } + + // Keep a globally available copy of this and the next frame's iiratio. + cpi->twopass.this_iiratio = (int)(this_frame_intra_error / + DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); + { + FIRSTPASS_STATS next_frame; + if (lookup_next_frame_stats(&cpi->twopass, &next_frame) != EOF) { + cpi->twopass.next_iiratio = (int)(next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); + } + } + + // Set nominal per second bandwidth for this frame + cpi->target_bandwidth = (int)(rc->per_frame_bandwidth * + cpi->output_framerate); + if (cpi->target_bandwidth < 0) + cpi->target_bandwidth = 0; + + // Update the total stats remaining structure + subtract_stats(&cpi->twopass.total_left_stats, &this_frame); +} + void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { #ifdef DISABLE_RC_LONG_TERM_MEM cpi->twopass.bits_left -= cpi->rc.this_frame_target; diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index f89e4cb1c..ca5b10080 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -12,6 +12,10 @@ #define VP9_ENCODER_VP9_FIRSTPASS_H_ #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_init_first_pass(VP9_COMP *cpi); void vp9_first_pass(VP9_COMP *cpi); void vp9_end_first_pass(VP9_COMP *cpi); @@ -25,4 +29,8 @@ void vp9_get_one_pass_params(VP9_COMP *cpi); void vp9_get_one_pass_cbr_params(VP9_COMP *cpi); void vp9_get_svc_params(VP9_COMP *cpi); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_FIRSTPASS_H_ diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index ee73ff15a..e6e59c05a 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -173,7 +173,6 @@ struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx, int index) { struct lookahead_entry *buf = NULL; - assert(index < (int)ctx->max_sz); if (index < (int)ctx->sz) { index += ctx->read_idx; if (index >= (int)ctx->max_sz) diff --git a/vp9/encoder/vp9_lookahead.h b/vp9/encoder/vp9_lookahead.h index c773f8fcc..1c00c462d 100644 --- a/vp9/encoder/vp9_lookahead.h +++ b/vp9/encoder/vp9_lookahead.h @@ -14,6 +14,10 @@ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + #define MAX_LAG_BUFFERS 25 struct lookahead_entry { @@ -94,4 +98,8 @@ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, */ unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_LOOKAHEAD_H_ diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h index c5bca4d01..79dd2bc95 100644 --- a/vp9/encoder/vp9_mbgraph.h +++ b/vp9/encoder/vp9_mbgraph.h @@ -11,6 +11,14 @@ #ifndef VP9_ENCODER_VP9_MBGRAPH_H_ #define VP9_ENCODER_VP9_MBGRAPH_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_update_mbgraph_stats(VP9_COMP *cpi); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_MBGRAPH_H_ diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index b63fbd56b..4c4ac5dfa 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -184,12 +184,6 @@ static INLINE int sp(int x) { return (x & 7) << 1; } -#define IFMVCV(r, c, s, e) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) \ - s \ - else \ - e; - static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) { return &buf[(r >> 3) * stride + (c >> 3) - offset]; } @@ -201,17 +195,18 @@ static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) { /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ - IFMVCV(r, c, { \ - thismse = (DIST(r, c)); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - }, \ - v = INT_MAX;) + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + thismse = (DIST(r, c)); \ + if ((v = MVC(r, c) + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } #define FIRST_LEVEL_CHECKS \ { \ @@ -469,7 +464,6 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, #undef MVC #undef PRE #undef DIST -#undef IFMVCV #undef CHECK_BETTER #undef SP diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index eee28a7ba..c3a8be212 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -15,6 +15,10 @@ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_variance.h" +#ifdef __cplusplus +extern "C" { +#endif + // The maximum number of steps in a step search given the largest // allowed initial step #define MAX_MVSEARCH_STEPS 11 @@ -129,4 +133,8 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int *mvjcost, int *mvcost[2], const MV *center_mv, const uint8_t *second_pred, int w, int h); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_MCOMP_H_ diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 27531d232..a9b0718c8 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -93,14 +93,6 @@ FILE *kf_list; FILE *keyfile; #endif - -#ifdef MODE_STATS -extern void init_tx_count_stats(); -extern void write_tx_count_stats(); -extern void init_switchable_interp_stats(); -extern void write_switchable_interp_stats(); -#endif - #ifdef SPEEDSTATS unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; @@ -463,14 +455,17 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { cache_ptr += cm->mi_cols; } } +static int is_slowest_mode(int mode) { + return (mode == MODE_SECONDPASS_BEST || mode == MODE_BESTQUALITY); +} -static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) { +static void set_rd_speed_thresholds(VP9_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; int i; // Set baseline threshold values for (i = 0; i < MAX_MODES; ++i) - sf->thresh_mult[i] = mode == 0 ? -500 : 0; + sf->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; sf->thresh_mult[THR_NEARESTMV] = 0; sf->thresh_mult[THR_NEARESTG] = 0; @@ -546,12 +541,12 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode) { } } -static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi, int mode) { +static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; int i; for (i = 0; i < MAX_REFS; ++i) - sf->thresh_mult_sub8x8[i] = mode == 0 ? -500 : 0; + sf->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0; sf->thresh_mult_sub8x8[THR_LAST] += 2500; sf->thresh_mult_sub8x8[THR_GOLD] += 2500; @@ -601,7 +596,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; + sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; @@ -627,14 +622,14 @@ static void set_good_speed_feature(VP9_COMMON *cm, FLAG_SKIP_INTRA_LOWVAR; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -664,14 +659,14 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 100; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -699,14 +694,14 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->reference_masking = 1; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 200; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -772,7 +767,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 1; + sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; @@ -798,14 +793,14 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; - sf->adaptive_pred_filter_type = 2; + sf->adaptive_pred_interp_filter = 2; sf->auto_mv_step_size = 1; sf->reference_masking = 1; sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -847,17 +842,24 @@ static void set_rt_speed_feature(VP9_COMMON *cm, if (speed >= 5) { int i; sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->auto_min_max_partition_size = frame_is_intra_only(cm) ? + RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX; + sf->subpel_force_stop = 1; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; } + sf->use_fast_lpf_pick = 2; + sf->RD = 0; + } + if (speed >= 6) { + sf->super_fast_rtc = 1; } } void vp9_set_speed_features(VP9_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; VP9_COMMON *cm = &cpi->common; - int mode = cpi->compressor_speed; int speed = cpi->speed; int i; @@ -874,6 +876,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->recode_loop = 1; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; @@ -884,12 +887,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; - sf->adaptive_pred_filter_type = 0; + sf->adaptive_pred_interp_filter = 0; sf->reference_masking = 0; sf->use_one_partition_size_always = 0; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; sf->max_partition_size = BLOCK_64X64; sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; @@ -909,23 +912,26 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_fast_coef_updates = 0; sf->using_small_partition_info = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->super_fast_rtc = 0; - switch (mode) { - case 0: // This is the best quality mode. + switch (cpi->oxcf.mode) { + case MODE_BESTQUALITY: + case MODE_SECONDPASS_BEST: // This is the best quality mode. cpi->diamond_search_sad = vp9_full_range_search; break; - case 1: + case MODE_FIRSTPASS: + case MODE_GOODQUALITY: + case MODE_SECONDPASS: set_good_speed_feature(cm, sf, speed); break; - break; - case 2: + case MODE_REALTIME: set_rt_speed_feature(cm, sf, speed); break; }; /* switch */ // Set rd thresholds based on mode and speed setting - set_rd_speed_thresholds(cpi, mode); - set_rd_speed_thresholds_sub8x8(cpi, mode); + set_rd_speed_thresholds(cpi); + set_rd_speed_thresholds_sub8x8(cpi); // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. @@ -969,7 +975,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, cpi->oxcf.width, cpi->oxcf.height, cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + VP9_ENC_BORDER_IN_PIXELS)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); } @@ -1037,14 +1043,14 @@ static void update_frame_size(VP9_COMP *cpi) { if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + VP9_ENC_BORDER_IN_PIXELS)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate last frame buffer"); if (vp9_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL)) + VP9_ENC_BORDER_IN_PIXELS)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate scaled source buffer"); @@ -1246,24 +1252,24 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { // Real time and one pass deprecated in test code base case MODE_GOODQUALITY: cpi->pass = 0; - cpi->compressor_speed = 2; cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5); break; case MODE_FIRSTPASS: cpi->pass = 1; - cpi->compressor_speed = 1; break; case MODE_SECONDPASS: cpi->pass = 2; - cpi->compressor_speed = 1; cpi->oxcf.cpu_used = clamp(cpi->oxcf.cpu_used, -5, 5); break; case MODE_SECONDPASS_BEST: cpi->pass = 2; - cpi->compressor_speed = 0; + break; + + case MODE_REALTIME: + cpi->pass = 0; break; } @@ -1339,7 +1345,7 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->cq_target_quality = cpi->oxcf.cq_level; - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; + cm->interp_filter = DEFAULT_INTERP_FILTER; cpi->target_bandwidth = cpi->oxcf.target_bandwidth; @@ -1631,11 +1637,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_context_counters(); #endif -#ifdef MODE_STATS - init_tx_count_stats(); - init_switchable_interp_stats(); -#endif - /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; @@ -1892,13 +1893,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) { vp9_end_second_pass(cpi); } -#ifdef MODE_STATS - if (cpi->pass != 1) { - write_tx_count_stats(); - write_switchable_interp_stats(); - } -#endif - #if CONFIG_INTERNAL_STATS vp9_clear_system_state(); @@ -2203,7 +2197,7 @@ int vp9_get_reference_enc(VP9_PTR ptr, int index, YV12_BUFFER_CONFIG **fb) { if (index < 0 || index >= REF_FRAMES) return -1; - *fb = &cm->yv12_fb[cm->ref_frame_map[index]]; + *fb = &cm->frame_bufs[cm->ref_frame_map[index]].buf; return 0; } @@ -2495,9 +2489,9 @@ static void update_reference_frames(VP9_COMP * const cpi) { // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } #if CONFIG_MULTIPLE_ARF @@ -2518,7 +2512,7 @@ static void update_reference_frames(VP9_COMP * const cpi) { */ int tmp; - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); tmp = cpi->alt_fb_idx; @@ -2532,18 +2526,18 @@ static void update_reference_frames(VP9_COMP * const cpi) { arf_idx = cpi->arf_buffer_idx[cpi->sequence_number + 1]; } #endif - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); } if (cpi->refresh_golden_frame) { - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); } } if (cpi->refresh_last_frame) { - ref_cnt_fb(cm->fb_idx_ref_cnt, + ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); } } @@ -2581,20 +2575,20 @@ static void scale_references(VP9_COMP *cpi) { for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *ref = &cm->yv12_fb[idx]; + YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf; if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); - vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb], + vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); - scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]); + VP9_ENC_BORDER_IN_PIXELS); + scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; } else { cpi->scaled_ref_idx[ref_frame - 1] = idx; - cm->fb_idx_ref_cnt[idx]++; + cm->frame_bufs[idx].ref_count++; } } } @@ -2604,7 +2598,7 @@ static void release_scaled_references(VP9_COMP *cpi) { int i; for (i = 0; i < 3; i++) - cm->fb_idx_ref_cnt[cpi->scaled_ref_idx[i]]--; + cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--; } static void full_to_model_count(unsigned int *model_count, @@ -2749,7 +2743,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->sf.recode_loop != 0) { vp9_save_coding_context(cpi); cpi->dummy_packing = 1; - vp9_pack_bitstream(cpi, dest, size); + if (!cpi->sf.super_fast_rtc) + vp9_pack_bitstream(cpi, dest, size); + cpi->rc.projected_frame_size = (*size) << 3; vp9_restore_coding_context(cpi); @@ -2972,15 +2968,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Clear down mmx registers to allow floating point in what follows. vp9_clear_system_state(); - // For an alt ref frame in 2 pass we skip the call to the second - // pass function that sets the target bandwidth so we must set it here. - if (cpi->refresh_alt_ref_frame) { - // Set a per frame bit target for the alt ref frame. - cpi->rc.per_frame_bandwidth = cpi->twopass.gf_bits; - // Set a per second target bitrate. - cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_framerate); - } - // Clear zbin over-quant value and mode boost values. cpi->zbin_mode_boost = 0; @@ -3107,13 +3094,24 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Decide q and q bounds + // Decide q and q bounds. q = vp9_rc_pick_q_and_adjust_q_bounds(cpi, &bottom_index, &top_index); + // JBB : This is realtime mode. In real time mode the first frame + // should be larger. Q of 0 is disabled because we force tx size to be + // 16x16... + if (cpi->sf.super_fast_rtc) { + if (cpi->common.current_video_frame == 0) + q /= 3; + + if (q == 0) + q++; + } + if (!frame_is_intra_only(cm)) { - cm->mcomp_filter_type = DEFAULT_INTERP_FILTER; + cm->interp_filter = DEFAULT_INTERP_FILTER; /* TODO: Decide this more intelligently */ set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH)); } @@ -3253,7 +3251,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->last_height = cm->height; // reset to normal state now that we are done. - cm->last_show_frame = cm->show_frame; + if (!cm->show_existing_frame) + cm->last_show_frame = cm->show_frame; if (cm->show_frame) { // current mip will be the prev_mip for the next frame MODE_INFO *temp = cm->prev_mip; @@ -3312,7 +3311,6 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_get_second_pass_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); - // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt"); vp9_twopass_postencode_update(cpi, *size); } @@ -3334,6 +3332,7 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { VP9_COMP *cpi = (VP9_COMP *) ptr; + VP9_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; int res = 0; const int subsampling_x = sd->uv_width < sd->y_width; @@ -3347,6 +3346,12 @@ int vp9_receive_raw_frame(VP9_PTR ptr, unsigned int frame_flags, vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); + if (cm->version == 0 && (subsampling_x != 1 || subsampling_y != 1)) { + vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, + "Non-4:2:0 color space requires profile >= 1"); + res = -1; + } + return res; } @@ -3414,6 +3419,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, int64_t *time_stamp, int64_t *time_end, int flush) { VP9_COMP *cpi = (VP9_COMP *) ptr; VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; @@ -3461,8 +3467,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - configure_arnr_filter(cpi, cm->current_video_frame + frames_to_arf, - cpi->rc.gfu_boost); + vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost); vp9_temporal_filter_prepare(cpi, frames_to_arf); vp9_extend_frame_borders(&cpi->alt_ref_buffer, cm->subsampling_x, cm->subsampling_y); @@ -3478,7 +3483,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->rc.source_alt_ref_pending = 0; // Clear Pending altf Ref flag. + cpi->rc.source_alt_ref_pending = 0; + } else { + cpi->rc.source_alt_ref_pending = 0; } } @@ -3560,7 +3567,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, /* find a free buffer for the new frame, releasing the reference previously * held. */ - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + cm->frame_bufs[cm->new_fb_idx].ref_count--; cm->new_fb_idx = get_free_fb(cm); #if CONFIG_MULTIPLE_ARF @@ -3580,13 +3587,11 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, - VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL); - + VP9_ENC_BORDER_IN_PIXELS); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const buf = &cm->yv12_fb[idx]; - + YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf; RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; ref_buf->buf = buf; ref_buf->idx = idx; @@ -3598,11 +3603,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, vp9_extend_frame_borders(buf, cm->subsampling_x, cm->subsampling_y); } - vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); + set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); + xd->interp_kernel = vp9_get_interp_kernel( + DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_vaq_init(); - } + if (cpi->oxcf.aq_mode == VARIANCE_AQ) + vp9_vaq_init(); if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); @@ -3872,24 +3878,25 @@ void vp9_set_svc(VP9_PTR comp, int use_svc) { return; } -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { +int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *reference) { int i, j; int total = 0; - uint8_t *src = source->y_buffer; - uint8_t *dst = dest->y_buffer; + const uint8_t *src = source->y_buffer; + const uint8_t *ref = reference->y_buffer; // Loop through the Y plane raw and reconstruction data summing // (square differences) for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, - &sse); + total += vp9_mse16x16(src + j, source->y_stride, + ref + j, reference->y_stride, &sse); } src += 16 * source->y_stride; - dst += 16 * dest->y_stride; + ref += 16 * reference->y_stride; } return total; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index a665bf859..d928312b6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -8,25 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_ONYX_INT_H_ #define VP9_ENCODER_VP9_ONYX_INT_H_ #include <stdio.h> + #include "./vpx_config.h" +#include "vpx_ports/mem.h" +#include "vpx/internal/vpx_codec_internal.h" + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_onyx.h" -#include "vp9/encoder/vp9_treewriter.h" -#include "vp9/encoder/vp9_tokenize.h" #include "vp9/common/vp9_onyxc_int.h" -#include "vp9/encoder/vp9_variance.h" + #include "vp9/encoder/vp9_encodemb.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vpx_ports/mem.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_lookahead.h" +#include "vp9/encoder/vp9_mcomp.h" +#include "vp9/encoder/vp9_quantize.h" +#include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_tokenize.h" +#include "vp9/encoder/vp9_treewriter.h" +#include "vp9/encoder/vp9_variance.h" + +#ifdef __cplusplus +extern "C" { +#endif #define DISABLE_RC_LONG_TERM_MEM 0 // #define MODE_TEST_HIT_STATS @@ -68,7 +75,6 @@ typedef struct { // 0 = ZERO_MV, MV signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2]; FRAME_CONTEXT fc; } CODING_CONTEXT; @@ -95,18 +101,6 @@ typedef struct { } FIRSTPASS_STATS; typedef struct { - int frames_so_far; - double frame_intra_error; - double frame_coded_error; - double frame_pcnt_inter; - double frame_pcnt_motion; - double frame_mvr; - double frame_mvr_abs; - double frame_mvc; - double frame_mvc_abs; -} ONEPASS_FRAMESTATS; - -typedef struct { struct { int err; union { @@ -187,6 +181,12 @@ typedef enum { } TX_SIZE_SEARCH_METHOD; typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { // Values should be powers of 2 so that they can be selected as bits of // an integer flags field @@ -253,6 +253,9 @@ typedef struct { // Maximum number of steps in logarithmic subpel search before giving up. int subpel_iters_per_step; + // Control when to stop subpel search + int subpel_force_stop; + // Thresh_mult is used to set a threshold for the rd score. A higher value // means that we will accept the best mode so far more often. This number // is used in combination with the current block size, and thresh_freq_fact @@ -340,9 +343,8 @@ typedef struct { BLOCK_SIZE always_this_block_size; // Sets min and max partition sizes for this 64x64 region based on the - // same superblock in last encoded frame, and the left and above neighbor - // in this block. - int auto_min_max_partition_size; + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; // Min and max partition size we enable (block_size) as per auto // min max, but also used by adjust partitioning, and pick_partitioning. @@ -376,7 +378,7 @@ typedef struct { // best for 8x8 mode. If set to 0 we always re check all the filters for // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_filter_type; + int adaptive_pred_interp_filter; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags @@ -405,75 +407,19 @@ typedef struct { // final encode. int use_uv_intra_rd_estimate; - // This picks a loop filter strength by trying a small portion of the image - // with different values. + // This feature controls how the loop filter level is determined: + // 0: Try the full image with different values. + // 1: Try a small portion of the image with different values. + // 2: Estimate the level based on quantizer and frame type int use_fast_lpf_pick; // This feature limits the number of coefficients updates we actually do // by only looking at counts from 1/2 the bands. int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced -} SPEED_FEATURES; -typedef struct { - // Rate targetting variables - int this_frame_target; - int projected_frame_size; - int sb64_target_rate; - int last_q[3]; // Separate values for Intra/Inter/ARF-GF - int last_boosted_qindex; // Last boosted GF/KF/ARF q - - int gfu_boost; - int last_boost; - int kf_boost; - - double rate_correction_factor; - double key_frame_rate_correction_factor; - double gf_rate_correction_factor; - - unsigned int frames_since_golden; - unsigned int frames_till_gf_update_due; // Count down till next GF - unsigned int max_gf_interval; - unsigned int baseline_gf_interval; - unsigned int frames_to_key; - unsigned int frames_since_key; - unsigned int this_key_frame_forced; - unsigned int next_key_frame_forced; - unsigned int source_alt_ref_pending; - unsigned int source_alt_ref_active; - unsigned int is_src_frame_alt_ref; - - int per_frame_bandwidth; // Current section per frame bandwidth target - int av_per_frame_bandwidth; // Average frame size target for clip - int min_frame_bandwidth; // Minimum allocation used for any frame - int max_frame_bandwidth; // Maximum burst rate allowed for a frame. - - int ni_av_qi; - int ni_tot_qi; - int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF - double tot_q; - double avg_q; - - int buffer_level; - int bits_off_target; - - int decimation_factor; - int decimation_count; - - int rolling_target_bits; - int rolling_actual_bits; - - int long_rolling_target_bits; - int long_rolling_actual_bits; - - int64_t total_actual_bits; - int total_target_vs_actual; // debug stats - - int worst_quality; - int active_worst_quality; - int best_quality; - // int active_best_quality; -} RATE_CONTROL; + // This flag control the use of the new super fast rtc mode + int super_fast_rtc; +} SPEED_FEATURES; typedef struct VP9_COMP { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); @@ -496,7 +442,6 @@ typedef struct VP9_COMP { MACROBLOCK mb; VP9_COMMON common; VP9_CONFIG oxcf; - struct rdcost_block_args rdcost_stack; struct lookahead_ctx *lookahead; struct lookahead_entry *source; #if CONFIG_MULTIPLE_ARF @@ -601,11 +546,6 @@ typedef struct VP9_COMP { int64_t target_bandwidth; struct vpx_codec_pkt_list *output_pkt_list; -#if 0 - // Experimental code for lagged and one pass - ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; - int one_pass_frame_index; -#endif MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation @@ -613,12 +553,11 @@ typedef struct VP9_COMP { // for real time encoding int speed; - int compressor_speed; int cpu_used; int pass; - vp9_prob last_skip_false_probs[3][MBSKIP_CONTEXTS]; + vp9_prob last_skip_false_probs[3][SKIP_CONTEXTS]; int last_skip_probs_q[3]; int ref_frame_flags; @@ -780,7 +719,8 @@ typedef struct VP9_COMP { PARTITION_CONTEXT left_seg_context[8]; } VP9_COMP; -static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { +static int get_ref_frame_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -790,21 +730,11 @@ static int get_ref_frame_idx(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { } } -static int get_scale_ref_frame_idx(VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - if (ref_frame == LAST_FRAME) { - return 0; - } else if (ref_frame == GOLDEN_FRAME) { - return 1; - } else { - return 2; - } -} - static YV12_BUFFER_CONFIG *get_ref_frame_buffer(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { VP9_COMMON *const cm = &cpi->common; - return &cm->yv12_fb[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]; + return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, + ref_frame)]].buf; } void vp9_encode_frame(VP9_COMP *cpi); @@ -815,7 +745,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); void vp9_set_speed_features(VP9_COMP *cpi); -int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); +int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, + const YV12_BUFFER_CONFIG *reference); void vp9_alloc_compressor_data(VP9_COMP *cpi); @@ -825,4 +756,16 @@ static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); } +static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { + xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME + : 0]; + xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME + : 0]; +} + +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_ONYX_INT_H_ diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index a4ceabdf1..0c0a20f90 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -14,6 +14,7 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" +#include "vp9/common/vp9_quant_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" #include "vp9/common/vp9_alloccommon.h" @@ -33,40 +34,53 @@ static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) { void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) { } -void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) { +static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, + MACROBLOCKD *const xd, VP9_COMMON *const cm, + int filt_level, int partial) { + int filt_err; + + vp9_set_alt_lf_level(cpi, filt_level); + vp9_loop_filter_frame(cm, xd, filt_level, 1, partial); + + filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); + + // Re-instate the unfiltered frame + vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + + return filt_err; +} + +static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, + int partial) { MACROBLOCKD *const xd = &cpi->mb.e_mbd; VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); - int best_err = 0; - int filt_err = 0; + int best_err; int filt_best; int filt_direction = 0; // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; + // Sum squared error at each filter level + int ss_err[MAX_LOOP_FILTER + 1]; - lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 - : cpi->oxcf.sharpness; + // Set each entry to -1 + vpx_memset(ss_err, 0xFF, sizeof(ss_err)); // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); - // Get baseline error score - vp9_set_alt_lf_level(cpi, filt_mid); - vp9_loop_filter_frame(cm, xd, filt_mid, 1, partial); - - best_err = vp9_calc_ss_err(sd, cm->frame_to_show); + best_err = try_filter_frame(sd, cpi, xd, cm, filt_mid, partial); filt_best = filt_mid; - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); + ss_err[filt_mid] = best_err; while (filter_step > 0) { const int filt_high = MIN(filt_mid + filter_step, max_filter_level); const int filt_low = MAX(filt_mid - filter_step, min_filter_level); + int filt_err; // Bias against raising loop filter in favor of lowering it. int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; @@ -80,14 +94,12 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) { if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score - vp9_set_alt_lf_level(cpi, filt_low); - vp9_loop_filter_frame(cm, xd, filt_low, 1, partial); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - + if (ss_err[filt_low] < 0) { + filt_err = try_filter_frame(sd, cpi, xd, cm, filt_low, partial); + ss_err[filt_low] = filt_err; + } else { + filt_err = ss_err[filt_low]; + } // If value is close to the best so far then bias towards a lower loop // filter value. if ((filt_err - bias) < best_err) { @@ -101,14 +113,12 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) { // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { - vp9_set_alt_lf_level(cpi, filt_high); - vp9_loop_filter_frame(cm, xd, filt_high, 1, partial); - - filt_err = vp9_calc_ss_err(sd, cm->frame_to_show); - - // Re-instate the unfiltered frame - vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); - + if (ss_err[filt_high] < 0) { + filt_err = try_filter_frame(sd, cpi, xd, cm, filt_high, partial); + ss_err[filt_high] = filt_err; + } else { + filt_err = ss_err[filt_high]; + } // Was it better than the previous best? if (filt_err < (best_err - bias)) { best_err = filt_err; @@ -128,3 +138,27 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) { lf->filter_level = filt_best; } + +void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, + int method) { + VP9_COMMON *const cm = &cpi->common; + struct loopfilter *const lf = &cm->lf; + + lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 + : cpi->oxcf.sharpness; + + if (method == 2) { + const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); + const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); + const int q = vp9_ac_quant(cm->base_qindex, 0); + // These values were determined by linear fitting the result of the + // searched level + // filt_guess = q * 0.316206 + 3.87252 + int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18; + if (cm->frame_type == KEY_FRAME) + filt_guess -= 4; + lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); + } else { + search_filter_level(sd, cpi, method == 1); + } +} diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h index 9de4cf849..0fc1f88b3 100644 --- a/vp9/encoder/vp9_picklpf.h +++ b/vp9/encoder/vp9_picklpf.h @@ -12,11 +12,19 @@ #ifndef VP9_ENCODER_VP9_PICKLPF_H_ #define VP9_ENCODER_VP9_PICKLPF_H_ +#ifdef __cplusplus +extern "C" { +#endif + struct yv12_buffer_config; struct VP9_COMP; void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val); -void vp9_pick_filter_level(struct yv12_buffer_config *sd, - struct VP9_COMP *cpi, int partial); +void vp9_pick_filter_level(const struct yv12_buffer_config *sd, + struct VP9_COMP *cpi, int method); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_PICKLPF_H_ diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index f317f2a0d..bd28ea51e 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -60,8 +60,8 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int buf_offset; int stride = xd->plane[0].pre[0].stride; - YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); - + const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, + ref); if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to @@ -80,7 +80,7 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, step_param = 6; further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { + for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { tmp_mv->as_int = INVALID_MV; @@ -124,8 +124,8 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, stride, 0x7fffffff); // scale to 1/8 pixel resolution - tmp_mv->as_mv.row = tmp_mv->as_mv.row << 3; - tmp_mv->as_mv.col = tmp_mv->as_mv.col << 3; + tmp_mv->as_mv.row = tmp_mv->as_mv.row * 8; + tmp_mv->as_mv.col = tmp_mv->as_mv.col * 8; // calculate the bit cost on motion vector *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv, @@ -142,8 +142,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *returnrate, int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx) { + BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); @@ -155,6 +154,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, VP9_ALT_FLAG }; int64_t best_rd = INT64_MAX; int64_t this_rd; + int64_t cost[4]= { 0, 100, 150, 205 }; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -171,7 +171,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, @@ -182,7 +182,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { int rate_mv = 0; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) @@ -191,29 +191,42 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Select prediction reference frames. xd->plane[0].pre[0] = yv12_mb[ref_frame][0]; - - x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = - full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv); - - if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) - continue; - clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd); clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd); for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - int rate = x->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; - int64_t dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] * - x->mode_sad[ref_frame][INTER_OFFSET(this_mode)]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + int rate = cost[this_mode - NEARESTMV]; + int64_t dist; + + if (this_mode == NEWMV) { + if (this_rd < 300) + continue; + + x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = + full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv); + + if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) + continue; + } + + dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)]; + this_rd = rate + dist; if (this_rd < best_rd) { best_rd = this_rd; mbmi->mode = this_mode; mbmi->ref_frame[0] = ref_frame; mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + mbmi->interp_filter = EIGHTTAP; + + mbmi->ref_frame[1] = INTRA_FRAME; + mbmi->tx_size = max_txsize_lookup[bsize]; + mbmi->uv_mode = this_mode; + mbmi->skip_coeff = 0; + mbmi->sb_type = bsize; + mbmi->segment_id = 0; } } } @@ -223,8 +236,5 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jingning) intra prediction search, if the best SAD is above a certain // threshold. - // store mode decisions - ctx->mic = *xd->mi_8x8[0]; - return INT64_MAX; } diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h index 32750fa69..05ff18762 100644 --- a/vp9/encoder/vp9_pickmode.h +++ b/vp9/encoder/vp9_pickmode.h @@ -8,12 +8,24 @@ * be found in the AUTHORS file in the root of the source tree. */ +#ifndef VP9_ENCODER_VP9_PICKMODE_H_ +#define VP9_ENCODER_VP9_PICKMODE_H_ + #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const struct TileInfo *const tile, int mi_row, int mi_col, int *returnrate, int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx); + BLOCK_SIZE bsize); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_ENCODER_VP9_PICKMODE_H_ diff --git a/vp9/encoder/vp9_psnr.h b/vp9/encoder/vp9_psnr.h index 15dd8366b..ffe00ed2c 100644 --- a/vp9/encoder/vp9_psnr.h +++ b/vp9/encoder/vp9_psnr.h @@ -12,6 +12,14 @@ #ifndef VP9_ENCODER_VP9_PSNR_H_ #define VP9_ENCODER_VP9_PSNR_H_ +#ifdef __cplusplus +extern "C" { +#endif + double vp9_mse2psnr(double samples, double peak, double mse); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_PSNR_H_ diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 41cfa5283..680cf4aec 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -13,6 +13,10 @@ #include "vp9/encoder/vp9_block.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); @@ -28,4 +32,8 @@ void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_QUANTIZE_H_ diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 3ebf98c0f..74eb98fb0 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -218,7 +218,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { vp9_clear_system_state(); // __asm emms; // For 1-pass. - if (cpi->pass == 0) { + if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->common.current_video_frame == 0) { target = oxcf->starting_buffer_level / 2; } else { @@ -246,7 +246,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { if (oxcf->rc_max_intra_bitrate_pct) { const int max_rate = rc->per_frame_bandwidth * - oxcf->rc_max_intra_bitrate_pct / 100; + oxcf->rc_max_intra_bitrate_pct / 100; target = MIN(target, max_rate); } rc->this_frame_target = target; @@ -375,27 +375,22 @@ static int target_size_from_buffer_level(const VP9_CONFIG *oxcf, static void calc_pframe_target_size(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - int min_frame_target = MAX(rc->min_frame_bandwidth, - rc->av_per_frame_bandwidth >> 5); - if (cpi->refresh_alt_ref_frame) { - // Special alt reference frame case - // Per frame bit target for the alt ref frame - rc->per_frame_bandwidth = cpi->twopass.gf_bits; - rc->this_frame_target = rc->per_frame_bandwidth; - } else { - // Normal frames (gf and inter). - rc->this_frame_target = rc->per_frame_bandwidth; - // Set target frame size based on buffer level, for 1 pass CBR. - if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { - // Need to decide how low min_frame_target should be for 1-pass CBR. - // For now, use: cpi->rc.av_per_frame_bandwidth / 16: - min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, - FRAME_OVERHEAD_BITS); - rc->this_frame_target = target_size_from_buffer_level(oxcf, rc); - // Adjust qp-max based on buffer level. - rc->active_worst_quality = - adjust_active_worst_quality_from_buffer_level(oxcf, rc); - } + int min_frame_target; + rc->this_frame_target = rc->per_frame_bandwidth; + + if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + // Need to decide how low min_frame_target should be for 1-pass CBR. + // For now, use: cpi->rc.av_per_frame_bandwidth / 16: + min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, + FRAME_OVERHEAD_BITS); + rc->this_frame_target = target_size_from_buffer_level(oxcf, rc); + // Adjust qp-max based on buffer level. + rc->active_worst_quality = + adjust_active_worst_quality_from_buffer_level(oxcf, rc); + + if (rc->this_frame_target < min_frame_target) + rc->this_frame_target = min_frame_target; + return; } // Check that the total sum of adjustments is not above the maximum allowed. @@ -404,6 +399,9 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) { // not capable of recovering all the extra bits we have spent in the KF or GF, // then the remainder will have to be recovered over a longer time span via // other buffer / rate control mechanisms. + min_frame_target = MAX(rc->min_frame_bandwidth, + rc->av_per_frame_bandwidth >> 5); + if (rc->this_frame_target < min_frame_target) rc->this_frame_target = min_frame_target; @@ -468,8 +466,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { // Work out a size correction factor. if (projected_size_based_on_q > 0) - correction_factor = - (100 * cpi->rc.projected_frame_size) / projected_size_based_on_q; + correction_factor = (100 * cpi->rc.projected_frame_size) / + projected_size_based_on_q; // More heavily damped adjustment used if we have been oscillating either side // of target. @@ -514,26 +512,25 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) { int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality) { + const VP9_COMMON *const cm = &cpi->common; int q = active_worst_quality; int last_error = INT_MAX; - int i, target_bits_per_mb, bits_per_mb_at_this_q; + int i, target_bits_per_mb; const double correction_factor = get_rate_correction_factor(cpi); // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) - target_bits_per_mb = - (target_bits_per_frame / cpi->common.MBs) - << BPER_MB_NORMBITS; // Case where we would overflow int + // Case where we would overflow int + target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS; else - target_bits_per_mb = - (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; + target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs; i = active_best_quality; do { - bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i, - correction_factor); + const int bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cm->frame_type, i, + correction_factor); if (bits_per_mb_at_this_q <= target_bits_per_mb) { if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) @@ -550,25 +547,19 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, return q; } -static int get_active_quality(int q, - int gfu_boost, - int low, - int high, - int *low_motion_minq, - int *high_motion_minq) { - int active_best_quality; +static int get_active_quality(int q, int gfu_boost, int low, int high, + int *low_motion_minq, int *high_motion_minq) { if (gfu_boost > high) { - active_best_quality = low_motion_minq[q]; + return low_motion_minq[q]; } else if (gfu_boost < low) { - active_best_quality = high_motion_minq[q]; + return high_motion_minq[q]; } else { const int gap = high - low; const int offset = high - gfu_boost; const int qdiff = high_motion_minq[q] - low_motion_minq[q]; const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - active_best_quality = low_motion_minq[q] + adjustment; + return low_motion_minq[q] + adjustment; } - return active_best_quality; } int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, @@ -615,8 +606,8 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality); - active_best_quality += - vp9_compute_qdelta(cpi, q_val, (q_val * q_adj_factor)); + active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val * + q_adj_factor); } #else double current_q; @@ -720,15 +711,12 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY // Limit Q range for the adaptive loop. if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) { - if (!(cpi->pass == 0 && cm->current_video_frame == 0)) { - *top_index = - (active_worst_quality + active_best_quality * 3) / 4; - } + if (!(cpi->pass == 0 && cm->current_video_frame == 0)) + *top_index = (active_worst_quality + active_best_quality * 3) / 4; } else if (!rc->is_src_frame_alt_ref && (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - *top_index = - (active_worst_quality + active_best_quality) / 2; + *top_index = (active_worst_quality + active_best_quality) / 2; } #endif @@ -818,7 +806,8 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, // return of 0 means drop frame int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; if (cm->frame_type == KEY_FRAME) calc_iframe_target_size(cpi); @@ -826,12 +815,12 @@ int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) { calc_pframe_target_size(cpi); // Clip the frame target to the maximum allowed value. - if (cpi->rc.this_frame_target > cpi->rc.max_frame_bandwidth) - cpi->rc.this_frame_target = cpi->rc.max_frame_bandwidth; + if (rc->this_frame_target > rc->max_frame_bandwidth) + rc->this_frame_target = rc->max_frame_bandwidth; // Target rate per SB64 (including partial SB64s. - cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) / - (cpi->common.width * cpi->common.height); + rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) / + (cm->width * cm->height); return 1; } diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 086755af8..eba4b7a92 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -12,61 +12,130 @@ #ifndef VP9_ENCODER_VP9_RATECTRL_H_ #define VP9_ENCODER_VP9_RATECTRL_H_ -#include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif #define FRAME_OVERHEAD_BITS 200 -void vp9_save_coding_context(VP9_COMP *cpi); -void vp9_restore_coding_context(VP9_COMP *cpi); - -void vp9_setup_key_frame(VP9_COMP *cpi); -void vp9_setup_inter_frame(VP9_COMP *cpi); +typedef struct { + // Rate targetting variables + int this_frame_target; + int projected_frame_size; + int sb64_target_rate; + int last_q[3]; // Separate values for Intra/Inter/ARF-GF + int last_boosted_qindex; // Last boosted GF/KF/ARF q + + int gfu_boost; + int last_boost; + int kf_boost; + + double rate_correction_factor; + double key_frame_rate_correction_factor; + double gf_rate_correction_factor; + + unsigned int frames_since_golden; + unsigned int frames_till_gf_update_due; // Count down till next GF + unsigned int max_gf_interval; + unsigned int baseline_gf_interval; + unsigned int frames_to_key; + unsigned int frames_since_key; + unsigned int this_key_frame_forced; + unsigned int next_key_frame_forced; + unsigned int source_alt_ref_pending; + unsigned int source_alt_ref_active; + unsigned int is_src_frame_alt_ref; + + int per_frame_bandwidth; // Current section per frame bandwidth target + int av_per_frame_bandwidth; // Average frame size target for clip + int min_frame_bandwidth; // Minimum allocation used for any frame + int max_frame_bandwidth; // Maximum burst rate allowed for a frame. + + int ni_av_qi; + int ni_tot_qi; + int ni_frames; + int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + double tot_q; + double avg_q; + + int buffer_level; + int bits_off_target; + + int decimation_factor; + int decimation_count; + + int rolling_target_bits; + int rolling_actual_bits; + + int long_rolling_target_bits; + int long_rolling_actual_bits; + + int64_t total_actual_bits; + int total_target_vs_actual; // debug stats + + int worst_quality; + int active_worst_quality; + int best_quality; + // int active_best_quality; +} RATE_CONTROL; + +struct VP9_COMP; + +void vp9_save_coding_context(struct VP9_COMP *cpi); +void vp9_restore_coding_context(struct VP9_COMP *cpi); + +void vp9_setup_key_frame(struct VP9_COMP *cpi); +void vp9_setup_inter_frame(struct VP9_COMP *cpi); double vp9_convert_qindex_to_q(int qindex); // Updates rate correction factors -void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); +void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var); // initialize luts for minq void vp9_rc_init_minq_luts(void); // return of 0 means drop frame // Changes only rc.this_frame_target and rc.sb64_rate_target -int vp9_rc_pick_frame_size_target(VP9_COMP *cpi); +int vp9_rc_pick_frame_size_target(struct VP9_COMP *cpi); -void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, +void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi, int this_frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit); // Picks q and q bounds given the target for bits -int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, +int vp9_rc_pick_q_and_adjust_q_bounds(const struct VP9_COMP *cpi, int *bottom_index, int *top_index); // Estimates q to achieve a target bits per frame -int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, +int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality); // Post encode update of the rate control parameters based // on bytes used -void vp9_rc_postencode_update(VP9_COMP *cpi, +void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // for dropped frames -void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi); +void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); // estimates bits per mb for a given qindex and correction factor int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, double correction_factor); // Post encode update of the rate control parameters for 2-pass -void vp9_twopass_postencode_update(VP9_COMP *cpi, +void vp9_twopass_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // Decide if we should drop this frame: For 1-pass CBR. -int vp9_drop_frame(VP9_COMP *cpi); +int vp9_drop_frame(struct VP9_COMP *cpi); // Update the buffer level. -void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size); +void vp9_update_buffer_level(struct VP9_COMP *cpi, int encoded_frame_size); + +#ifdef __cplusplus +} // extern "C" +#endif #endif // VP9_ENCODER_VP9_RATECTRL_H_ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 242aa8710..9cca3bd52 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -55,6 +55,22 @@ typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION; +struct rdcost_block_args { + MACROBLOCK *x; + ENTROPY_CONTEXT t_above[16]; + ENTROPY_CONTEXT t_left[16]; + int rate; + int64_t dist; + int64_t sse; + int this_rate; + int64_t this_dist; + int64_t this_sse; + int64_t this_rd; + int64_t best_rd; + int skip; + const int16_t *scan, *nb; +}; + const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARESTMV, {LAST_FRAME, NONE}}, {NEARESTMV, {ALTREF_FRAME, NONE}}, @@ -280,22 +296,24 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { fill_token_costs(x->token_costs, cm->fc.coef_probs); - for (i = 0; i < PARTITION_CONTEXTS; i++) - vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), - vp9_partition_tree); + if (!cpi->sf.super_fast_rtc) { + for (i = 0; i < PARTITION_CONTEXTS; i++) + vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), + vp9_partition_tree); - fill_mode_costs(cpi); + fill_mode_costs(cpi); - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc.nmvc, - cm->allow_high_precision_mv, 1, 1); + if (!frame_is_intra_only(cm)) { + vp9_build_nmv_cost_table(x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp + : x->nmvcost, + &cm->fc.nmvc, + cm->allow_high_precision_mv, 1, 1); - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->inter_mode_cost[i], - cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + vp9_cost_tokens((int *)x->inter_mode_cost[i], + cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); + } } } @@ -419,16 +437,26 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); + (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); + if (i == 0) x->pred_sse[ref] = sse; - - dist_sum += (int)sse; + if (cpi->sf.super_fast_rtc) { + dist_sum += (int)sse; + } else { + int rate; + int64_t dist; + model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); + rate_sum += rate; + dist_sum += (int)dist; + } } *out_rate_sum = rate_sum; - *out_dist_sum = dist_sum << 4; + *out_dist_sum = (int64_t)dist_sum << 4; } static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, @@ -575,15 +603,15 @@ static INLINE int cost_coeffs(MACROBLOCK *x, return cost; } -static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { +static void dist_block(int plane, int block, TX_SIZE tx_size, + struct rdcost_block_args* args) { const int ss_txfrm_size = tx_size << 1; - struct rdcost_block_args* args = arg; MACROBLOCK* const x = args->x; MACROBLOCKD* const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; int64_t this_sse; - int shift = args->tx_size == TX_32X32 ? 0 : 2; + int shift = tx_size == TX_32X32 ? 0 : 2; int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, @@ -600,14 +628,12 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { } static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct rdcost_block_args* args = arg; - + TX_SIZE tx_size, struct rdcost_block_args* args) { int x_idx, y_idx; - txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx); + txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx, - args->t_left + y_idx, args->tx_size, + args->t_left + y_idx, tx_size, args->scan, args->nb); } @@ -684,24 +710,19 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size, } } -static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size, - const int num_4x4_w, const int num_4x4_h, - const int64_t ref_rdcost, +static void init_rdcost_stack(MACROBLOCK *x, const int64_t ref_rdcost, struct rdcost_block_args *arg) { vpx_memset(arg, 0, sizeof(struct rdcost_block_args)); arg->x = x; - arg->tx_size = tx_size; - arg->bw = num_4x4_w; - arg->bh = num_4x4_h; arg->best_rd = ref_rdcost; } static void txfm_rd_in_plane(MACROBLOCK *x, - struct rdcost_block_args *rd_stack, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, TX_SIZE tx_size) { + struct rdcost_block_args rd_stack; MACROBLOCKD *const xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[plane]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); @@ -709,30 +730,29 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const int num_4x4_h = num_4x4_blocks_high_lookup[bs]; const scan_order *so; - init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h, - ref_best_rd, rd_stack); + init_rdcost_stack(x, ref_best_rd, &rd_stack); if (plane == 0) xd->mi_8x8[0]->mbmi.tx_size = tx_size; - vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left, + vp9_get_entropy_contexts(tx_size, rd_stack.t_above, rd_stack.t_left, pd->above_context, pd->left_context, num_4x4_w, num_4x4_h); so = get_scan(xd, tx_size, pd->plane_type, 0); - rd_stack->scan = so->scan; - rd_stack->nb = so->neighbors; + rd_stack.scan = so->scan; + rd_stack.nb = so->neighbors; foreach_transformed_block_in_plane(xd, bsize, plane, - block_rd_txfm, rd_stack); - if (rd_stack->skip) { + block_rd_txfm, &rd_stack); + if (rd_stack.skip) { *rate = INT_MAX; *distortion = INT64_MAX; *sse = INT64_MAX; *skippable = 0; } else { - *distortion = rd_stack->this_dist; - *rate = rd_stack->this_rate; - *sse = rd_stack->this_sse; + *distortion = rd_stack.this_dist; + *rate = rd_stack.this_rate; + *sse = rd_stack.this_sse; *skippable = vp9_is_skippable_in_plane(x, bsize, plane); } } @@ -750,7 +770,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_tx_size, largest_tx_size); - txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, + txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); cpi->tx_stepdown_count[0]++; @@ -881,7 +901,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, // Actually encode using the chosen mode if a model was used, but do not // update the r, d costs - txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip, + txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); if (max_tx_size == TX_32X32 && best_tx == TX_32X32) { @@ -904,7 +924,6 @@ static void super_block_yrd(VP9_COMP *cpi, int64_t d[TX_SIZES], sse[TX_SIZES]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack; const int b_inter_mode = is_inter_block(mbmi); const TX_SIZE max_tx_size = max_txsize_lookup[bs]; TX_SIZE tx_size; @@ -934,7 +953,7 @@ static void super_block_yrd(VP9_COMP *cpi, skip, sse, ref_best_rd, bs); } else { for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) - txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size], + txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size], &sse[tx_size], ref_best_rd, 0, bs, tx_size); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, @@ -1263,7 +1282,7 @@ static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x, *skippable = 1; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse, + txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_txfm_size); if (pnrate == INT_MAX) goto term; @@ -1517,8 +1536,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, vp9_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, - width, height, ref, &xd->subpix, MV_PRECISION_Q3, + &xd->block_refs[ref]->sf, width, height, ref, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -1749,7 +1768,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (best_rd < label_mv_thresh) break; - if (cpi->compressor_speed) { + if (cpi->oxcf.mode != MODE_SECONDPASS_BEST && + cpi->oxcf.mode != MODE_BESTQUALITY) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; @@ -1813,7 +1833,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, } // Should we do a full search (best quality only) - if (cpi->compressor_speed == 0) { + if (cpi->oxcf.mode == MODE_BESTQUALITY || + cpi->oxcf.mode == MODE_SECONDPASS_BEST) { /* Check if mvp_full is within the range. */ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); @@ -1840,7 +1861,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, &bsi->ref_mv->as_mv, cm->allow_high_precision_mv, x->errorperbit, v_fn_ptr, - 0, cpi->sf.subpel_iters_per_step, + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]]); @@ -2304,13 +2326,12 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, ref_frame, block_size); } -YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) { - YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; - int fb = get_ref_frame_idx(cpi, ref_frame); - int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame); - if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb]) - scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]]; - return scaled_ref_frame; +const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, + int ref_frame) { + const VP9_COMMON *const cm = &cpi->common; + const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; + const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; + return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } static INLINE int get_switchable_rate(const MACROBLOCK *x) { @@ -2342,7 +2363,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; - YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); + const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, + ref); int_mv pred_mv[3]; pred_mv[0] = mbmi->ref_mvs[ref][0]; @@ -2451,7 +2473,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - 0, cpi->sf.subpel_iters_per_step, + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref]); } @@ -2489,7 +2512,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, struct buf_2d backup_yv12[2][MAX_MB_PLANE]; struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0]; int last_besterr[2] = {INT_MAX, INT_MAX}; - YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { + const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]), vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1]) }; @@ -2536,7 +2559,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - &xd->subpix, MV_PRECISION_Q3, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2626,7 +2649,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate_y, int64_t *distortion_y, int *rate_uv, int64_t *distortion_uv, int *mode_excluded, int *disable_skip, - INTERPOLATION_TYPE *best_filter, + INTERP_FILTER *best_filter, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], @@ -2769,7 +2792,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) cpi->rd_filter_cache[i] = INT64_MAX; - if (cm->mcomp_filter_type != BILINEAR) { + if (cm->interp_filter != BILINEAR) { *best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { @@ -2783,7 +2806,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); @@ -2792,16 +2815,16 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; - if ((cm->mcomp_filter_type == SWITCHABLE && + if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || - (cm->mcomp_filter_type != SWITCHABLE && - (cm->mcomp_filter_type == mbmi->interp_filter || + (cm->interp_filter != SWITCHABLE && + (cm->interp_filter == mbmi->interp_filter || (i == 0 && intpel_mv)))) { restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { @@ -2817,7 +2840,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[i] = rd; cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd); @@ -2838,13 +2861,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (newbest) { best_rd = rd; *best_filter = mbmi->interp_filter; - if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) + if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { + if ((cm->interp_filter == SWITCHABLE && newbest) || + (cm->interp_filter != SWITCHABLE && + cm->interp_filter == mbmi->interp_filter)) { pred_exists = 1; } } @@ -2852,10 +2875,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } // Set the appropriate filter - mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? - cm->mcomp_filter_type : *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0; + mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? + cm->interp_filter : *best_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); + rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2884,7 +2907,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) *rate2 += get_switchable_rate(x); if (!is_comp_pred && cpi->enable_encode_breakout) { @@ -3129,7 +3152,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_inter_rd = INT64_MAX; MB_PREDICTION_MODE best_intra_mode = DC_PRED; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; @@ -3277,13 +3300,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } - set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1); + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mbmi->uv_mode = DC_PRED; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -3573,9 +3597,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { int64_t adj_rd; @@ -3649,8 +3673,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different @@ -3692,7 +3716,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); @@ -3709,7 +3733,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : @@ -3754,7 +3778,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_prob comp_mode_p; int64_t best_inter_rd = INT64_MAX; MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME; - INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE; + INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; @@ -3902,13 +3926,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1); + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mbmi->uv_mode = DC_PRED; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->mcomp_filter_type; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -4013,17 +4038,17 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) cpi->rd_filter_cache[i] = INT64_MAX; - if (cm->mcomp_filter_type != BILINEAR) { + if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; - } else if (cpi->sf.adaptive_pred_filter_type == 1 && - ctx->pred_filter_type < SWITCHABLE) { - tmp_best_filter = ctx->pred_filter_type; - } else if (cpi->sf.adaptive_pred_filter_type == 2) { - tmp_best_filter = ctx->pred_filter_type < SWITCHABLE ? - ctx->pred_filter_type : 0; + } else if (cpi->sf.adaptive_pred_interp_filter == 1 && + ctx->pred_interp_filter < SWITCHABLE) { + tmp_best_filter = ctx->pred_interp_filter; + } else if (cpi->sf.adaptive_pred_interp_filter == 2) { + tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? + ctx->pred_interp_filter : 0; } else { for (switchable_filter_index = 0; switchable_filter_index < SWITCHABLE_FILTERS; @@ -4031,8 +4056,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4051,7 +4075,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->rd_filter_cache[SWITCHABLE_FILTERS] = MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd); @@ -4061,9 +4085,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, tmp_best_filter = mbmi->interp_filter; tmp_best_rd = tmp_rd; } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { + if ((newbest && cm->interp_filter == SWITCHABLE) || + (mbmi->interp_filter == cm->interp_filter && + cm->interp_filter != SWITCHABLE)) { tmp_best_rdu = tmp_rd; tmp_best_rate = rate; tmp_best_ratey = rate_y; @@ -4095,9 +4119,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_best_rdu == INT64_MAX && pred_exists) continue; - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? - tmp_best_filter : cm->mcomp_filter_type); - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? + tmp_best_filter : cm->interp_filter); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4113,7 +4137,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; } else { - if (cm->mcomp_filter_type == SWITCHABLE) { + if (cm->interp_filter == SWITCHABLE) { int rs = get_switchable_rate(x); tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); } @@ -4131,7 +4155,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, rate2 += rate; distortion2 += distortion; - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) rate2 += get_switchable_rate(x); if (!mode_excluded) @@ -4299,9 +4323,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && - cm->mcomp_filter_type != BILINEAR) { - int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ? - SWITCHABLE_FILTERS : cm->mcomp_filter_type]; + cm->interp_filter != BILINEAR) { + int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ? + SWITCHABLE_FILTERS : cm->interp_filter]; int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (ref == INT64_MAX) @@ -4372,8 +4396,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || + assert((cm->interp_filter == SWITCHABLE) || + (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); // Updating rd_thresh_freq_fact[] here means that the different @@ -4425,7 +4449,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, else best_filter_diff[i] = best_rd - best_filter_rd[i]; } - if (cm->mcomp_filter_type == SWITCHABLE) + if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); @@ -4442,7 +4466,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 9ac1f5404..96cea4216 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -13,6 +13,10 @@ #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + #define RDDIV_BITS 7 #define RDCOST(RM, DM, R, D) \ @@ -46,7 +50,8 @@ void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_near_mv[MAX_REF_FRAMES], struct buf_2d yv12_mb[4][MAX_MB_PLANE]); -YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame); +const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, + int ref_frame); void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int64_t *d, BLOCK_SIZE bsize, @@ -80,4 +85,8 @@ void vp9_get_entropy_contexts(TX_SIZE tx_size, const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left, int num_4x4_w, int num_4x4_h); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_RDOPT_H_ diff --git a/vp9/encoder/vp9_resize.c b/vp9/encoder/vp9_resize.c index f15abc07d..0766b5107 100644 --- a/vp9/encoder/vp9_resize.c +++ b/vp9/encoder/vp9_resize.c @@ -16,7 +16,6 @@ #include <string.h> #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_resize.h" -#include "vpx/vpx_integer.h" #define FILTER_BITS 7 @@ -30,8 +29,44 @@ typedef int16_t interp_kernel[INTERP_TAPS]; -// Filters for interpolation - note this also filters integer pels. -const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = { +// Filters for interpolation (0.5-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = { + {-3, 0, 35, 64, 35, 0, -3, 0}, + {-3, -1, 34, 64, 36, 1, -3, 0}, + {-3, -1, 32, 64, 38, 1, -3, 0}, + {-2, -2, 31, 63, 39, 2, -3, 0}, + {-2, -2, 29, 63, 41, 2, -3, 0}, + {-2, -2, 28, 63, 42, 3, -4, 0}, + {-2, -3, 27, 63, 43, 4, -4, 0}, + {-2, -3, 25, 62, 45, 5, -4, 0}, + {-2, -3, 24, 62, 46, 5, -4, 0}, + {-2, -3, 23, 61, 47, 6, -4, 0}, + {-2, -3, 21, 60, 49, 7, -4, 0}, + {-1, -4, 20, 60, 50, 8, -4, -1}, + {-1, -4, 19, 59, 51, 9, -4, -1}, + {-1, -4, 17, 58, 52, 10, -4, 0}, + {-1, -4, 16, 57, 53, 12, -4, -1}, + {-1, -4, 15, 56, 54, 13, -4, -1}, + {-1, -4, 14, 55, 55, 14, -4, -1}, + {-1, -4, 13, 54, 56, 15, -4, -1}, + {-1, -4, 12, 53, 57, 16, -4, -1}, + {0, -4, 10, 52, 58, 17, -4, -1}, + {-1, -4, 9, 51, 59, 19, -4, -1}, + {-1, -4, 8, 50, 60, 20, -4, -1}, + {0, -4, 7, 49, 60, 21, -3, -2}, + {0, -4, 6, 47, 61, 23, -3, -2}, + {0, -4, 5, 46, 62, 24, -3, -2}, + {0, -4, 5, 45, 62, 25, -3, -2}, + {0, -4, 4, 43, 63, 27, -3, -2}, + {0, -4, 3, 42, 63, 28, -2, -2}, + {0, -3, 2, 41, 63, 29, -2, -2}, + {0, -3, 2, 39, 63, 31, -2, -2}, + {0, -3, 1, 38, 64, 32, -1, -3}, + {0, -3, 1, 36, 64, 34, -1, -3} +}; + +// Filters for interpolation (0.625-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = { {-1, -8, 33, 80, 33, -8, -1, 0}, {-1, -8, 30, 80, 35, -8, -1, 1}, {-1, -8, 28, 80, 37, -7, -2, 1}, @@ -66,10 +101,132 @@ const interp_kernel vp9_filteredinterp_filters[(1 << SUBPEL_BITS)] = { {1, -1, -8, 35, 80, 30, -8, -1}, }; +// Filters for interpolation (0.75-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = { + {2, -11, 25, 96, 25, -11, 2, 0}, + {2, -11, 22, 96, 28, -11, 2, 0}, + {2, -10, 19, 95, 31, -11, 2, 0}, + {2, -10, 17, 95, 34, -12, 2, 0}, + {2, -9, 14, 94, 37, -12, 2, 0}, + {2, -8, 12, 93, 40, -12, 1, 0}, + {2, -8, 9, 92, 43, -12, 1, 1}, + {2, -7, 7, 91, 46, -12, 1, 0}, + {2, -7, 5, 90, 49, -12, 1, 0}, + {2, -6, 3, 88, 52, -12, 0, 1}, + {2, -5, 1, 86, 55, -12, 0, 1}, + {2, -5, -1, 84, 58, -11, 0, 1}, + {2, -4, -2, 82, 61, -11, -1, 1}, + {2, -4, -4, 80, 64, -10, -1, 1}, + {1, -3, -5, 77, 67, -9, -1, 1}, + {1, -3, -6, 75, 70, -8, -2, 1}, + {1, -2, -7, 72, 72, -7, -2, 1}, + {1, -2, -8, 70, 75, -6, -3, 1}, + {1, -1, -9, 67, 77, -5, -3, 1}, + {1, -1, -10, 64, 80, -4, -4, 2}, + {1, -1, -11, 61, 82, -2, -4, 2}, + {1, 0, -11, 58, 84, -1, -5, 2}, + {1, 0, -12, 55, 86, 1, -5, 2}, + {1, 0, -12, 52, 88, 3, -6, 2}, + {0, 1, -12, 49, 90, 5, -7, 2}, + {0, 1, -12, 46, 91, 7, -7, 2}, + {1, 1, -12, 43, 92, 9, -8, 2}, + {0, 1, -12, 40, 93, 12, -8, 2}, + {0, 2, -12, 37, 94, 14, -9, 2}, + {0, 2, -12, 34, 95, 17, -10, 2}, + {0, 2, -11, 31, 95, 19, -10, 2}, + {0, 2, -11, 28, 96, 22, -11, 2} +}; + +// Filters for interpolation (0.875-band) - note this also filters integer pels. +const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = { + {3, -8, 13, 112, 13, -8, 3, 0}, + {3, -7, 10, 112, 17, -9, 3, -1}, + {2, -6, 7, 111, 21, -9, 3, -1}, + {2, -5, 4, 111, 24, -10, 3, -1}, + {2, -4, 1, 110, 28, -11, 3, -1}, + {1, -3, -1, 108, 32, -12, 4, -1}, + {1, -2, -3, 106, 36, -13, 4, -1}, + {1, -1, -6, 105, 40, -14, 4, -1}, + {1, -1, -7, 102, 44, -14, 4, -1}, + {1, 0, -9, 100, 48, -15, 4, -1}, + {1, 1, -11, 97, 53, -16, 4, -1}, + {0, 1, -12, 95, 57, -16, 4, -1}, + {0, 2, -13, 91, 61, -16, 4, -1}, + {0, 2, -14, 88, 65, -16, 4, -1}, + {0, 3, -15, 84, 69, -17, 4, 0}, + {0, 3, -16, 81, 73, -16, 3, 0}, + {0, 3, -16, 77, 77, -16, 3, 0}, + {0, 3, -16, 73, 81, -16, 3, 0}, + {0, 4, -17, 69, 84, -15, 3, 0}, + {-1, 4, -16, 65, 88, -14, 2, 0}, + {-1, 4, -16, 61, 91, -13, 2, 0}, + {-1, 4, -16, 57, 95, -12, 1, 0}, + {-1, 4, -16, 53, 97, -11, 1, 1}, + {-1, 4, -15, 48, 100, -9, 0, 1}, + {-1, 4, -14, 44, 102, -7, -1, 1}, + {-1, 4, -14, 40, 105, -6, -1, 1}, + {-1, 4, -13, 36, 106, -3, -2, 1}, + {-1, 4, -12, 32, 108, -1, -3, 1}, + {-1, 3, -11, 28, 110, 1, -4, 2}, + {-1, 3, -10, 24, 111, 4, -5, 2}, + {-1, 3, -9, 21, 111, 7, -6, 2}, + {-1, 3, -9, 17, 112, 10, -7, 3} +}; + +// Filters for interpolation (full-band) - no filtering for integer pixels +const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = { + {0, 0, 0, 128, 0, 0, 0, 0}, + {0, 1, -3, 128, 3, -1, 0, 0}, + {-1, 2, -6, 127, 7, -2, 1, 0}, + {-1, 3, -9, 126, 12, -4, 1, 0}, + {-1, 4, -12, 125, 16, -5, 1, 0}, + {-1, 4, -14, 123, 20, -6, 2, 0}, + {-1, 5, -15, 120, 25, -8, 2, 0}, + {-1, 5, -17, 118, 30, -9, 3, -1}, + {-1, 6, -18, 114, 35, -10, 3, -1}, + {-1, 6, -19, 111, 41, -12, 3, -1}, + {-1, 6, -20, 107, 46, -13, 4, -1}, + {-1, 6, -21, 103, 52, -14, 4, -1}, + {-1, 6, -21, 99, 57, -16, 5, -1}, + {-1, 6, -21, 94, 63, -17, 5, -1}, + {-1, 6, -20, 89, 68, -18, 5, -1}, + {-1, 6, -20, 84, 73, -19, 6, -1}, + {-1, 6, -20, 79, 79, -20, 6, -1}, + {-1, 6, -19, 73, 84, -20, 6, -1}, + {-1, 5, -18, 68, 89, -20, 6, -1}, + {-1, 5, -17, 63, 94, -21, 6, -1}, + {-1, 5, -16, 57, 99, -21, 6, -1}, + {-1, 4, -14, 52, 103, -21, 6, -1}, + {-1, 4, -13, 46, 107, -20, 6, -1}, + {-1, 3, -12, 41, 111, -19, 6, -1}, + {-1, 3, -10, 35, 114, -18, 6, -1}, + {-1, 3, -9, 30, 118, -17, 5, -1}, + {0, 2, -8, 25, 120, -15, 5, -1}, + {0, 2, -6, 20, 123, -14, 4, -1}, + {0, 1, -5, 16, 125, -12, 4, -1}, + {0, 1, -4, 12, 126, -9, 3, -1}, + {0, 1, -2, 7, 127, -6, 2, -1}, + {0, 0, -1, 3, 128, -3, 1, 0} +}; + // Filters for factor of 2 downsampling. static const int16_t vp9_down2_symeven_half_filter[] = {56, 12, -3, -1}; static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3}; +static const interp_kernel *choose_interp_filter(int inlength, int outlength) { + int outlength16 = outlength * 16; + if (outlength16 >= inlength * 16) + return vp9_filteredinterp_filters1000; + else if (outlength16 >= inlength * 13) + return vp9_filteredinterp_filters875; + else if (outlength16 >= inlength * 11) + return vp9_filteredinterp_filters750; + else if (outlength16 >= inlength * 9) + return vp9_filteredinterp_filters625; + else + return vp9_filteredinterp_filters500; +} + static void interpolate(const uint8_t *const input, int inlength, uint8_t *output, int outlength) { const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) / @@ -81,6 +238,9 @@ static void interpolate(const uint8_t *const input, int inlength, int x, x1, x2, sum, k, int_pel, sub_pel; int64_t y; + const interp_kernel *interp_filters = + choose_interp_filter(inlength, outlength); + x = 0; y = offset; while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { @@ -101,7 +261,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) { const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; @@ -116,7 +276,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? @@ -129,7 +289,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; @@ -140,7 +300,7 @@ static void interpolate(const uint8_t *const input, int inlength, const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; - filter = vp9_filteredinterp_filters[sub_pel]; + filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= diff --git a/vp9/encoder/vp9_resize.h b/vp9/encoder/vp9_resize.h index c67595a3f..1818cd47e 100644 --- a/vp9/encoder/vp9_resize.h +++ b/vp9/encoder/vp9_resize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -12,6 +12,7 @@ #define VP9_ENCODER_VP9_RESIZE_H_ #include <stdio.h> +#include "vpx/vpx_integer.h" void vp9_resize_plane(const uint8_t *const input, int height, diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h index 3c6eb7038..8238892e2 100644 --- a/vp9/encoder/vp9_segmentation.h +++ b/vp9/encoder/vp9_segmentation.h @@ -15,6 +15,10 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_enable_segmentation(VP9_PTR ptr); void vp9_disable_segmentation(VP9_PTR ptr); @@ -45,4 +49,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi); void vp9_reset_segment_features(struct segmentation *seg); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_SEGMENTATION_H_ diff --git a/vp9/encoder/vp9_subexp.h b/vp9/encoder/vp9_subexp.h index 1cafd8775..ab5659bf7 100644 --- a/vp9/encoder/vp9_subexp.h +++ b/vp9/encoder/vp9_subexp.h @@ -12,6 +12,10 @@ #ifndef VP9_ENCODER_VP9_SUBEXP_H_ #define VP9_ENCODER_VP9_SUBEXP_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_compute_update_table(); @@ -32,4 +36,8 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, vp9_prob upd, int b, int r); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_SUBEXP_H_ diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index c2eea0aaa..e822e4c64 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -60,7 +60,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - &xd->subpix, MV_PRECISION_Q3, x, y); + xd->interp_kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -68,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -76,7 +76,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -392,7 +392,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { const int num_frames_backward = distance; const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - (num_frames_backward + 1); - struct scale_factors sf; switch (blur_type) { @@ -408,7 +407,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { case 2: // Forward Blur - frames_to_blur_forward = num_frames_forward; if (frames_to_blur_forward >= max_frames) @@ -471,22 +469,24 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { strength, &sf); } -void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, - const int group_boost) { +void vp9_configure_arnr_filter(VP9_COMP *cpi, + const unsigned int frames_to_arnr, + const int group_boost) { int half_gf_int; int frames_after_arf; int frames_bwd = cpi->oxcf.arnr_max_frames - 1; int frames_fwd = cpi->oxcf.arnr_max_frames - 1; int q; - // Define the arnr filter width for this group of frames: - // We only filter frames that lie within a distance of half - // the GF interval from the ARF frame. We also have to trap - // cases where the filter extends beyond the end of clip. - // Note: this_frame->frame has been updated in the loop - // so it now points at the ARF frame. + // Define the arnr filter width for this group of frames. We only + // filter frames that lie within a distance of half the GF interval + // from the ARF frame. We also have to trap cases where the filter + // extends beyond the end of the lookahead buffer. + // Note: frames_to_arnr parameter is the offset of the arnr + // frame from the current frame. half_gf_int = cpi->rc.baseline_gf_interval >> 1; - frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); + frames_after_arf = vp9_lookahead_depth(cpi->lookahead) + - frames_to_arnr - 1; switch (cpi->oxcf.arnr_type) { case 1: // Backward filter diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h index c5f3b467e..3028d7884 100644 --- a/vp9/encoder/vp9_temporal_filter.h +++ b/vp9/encoder/vp9_temporal_filter.h @@ -11,8 +11,17 @@ #ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ #define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ +#ifdef __cplusplus +extern "C" { +#endif + void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance); -void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, - const int group_boost); +void vp9_configure_arnr_filter(VP9_COMP *cpi, + const unsigned int frames_to_arnr, + const int group_boost); + +#ifdef __cplusplus +} // extern "C" +#endif #endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index b04e3fe30..8e646f669 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -301,7 +301,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, struct tokenize_b_args arg = {cpi, xd, t, mbmi->tx_size, cpi->mb.token_cache}; if (mbmi->skip_coeff) { if (!dry_run) - cm->counts.mbskip[ctx][1] += skip_inc; + cm->counts.skip[ctx][1] += skip_inc; reset_skip_context(xd, bsize); if (dry_run) *t = t_backup; @@ -309,7 +309,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, } if (!dry_run) { - cm->counts.mbskip[ctx][0] += skip_inc; + cm->counts.skip[ctx][0] += skip_inc; foreach_transformed_block(xd, bsize, tokenize_b, &arg); } else { foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 67e6c9d3d..ea86240be 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -16,6 +16,10 @@ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_treewriter.h" +#ifdef __cplusplus +extern "C" { +#endif + void vp9_tokenize_initialize(); #define EOSB_TOKEN 127 // Not signalled, encoder only @@ -50,4 +54,8 @@ extern const int *vp9_dct_value_cost_ptr; */ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_TOKENIZE_H_ diff --git a/vp9/encoder/vp9_treewriter.h b/vp9/encoder/vp9_treewriter.h index 09f80b0ba..fedfbe937 100644 --- a/vp9/encoder/vp9_treewriter.h +++ b/vp9/encoder/vp9_treewriter.h @@ -13,6 +13,10 @@ #include "vp9/encoder/vp9_writer.h" +#ifdef __cplusplus +extern "C" { +#endif + #define vp9_cost_zero(prob) (vp9_prob_cost[prob]) #define vp9_cost_one(prob) vp9_cost_zero(vp9_complement(prob)) @@ -69,4 +73,8 @@ static INLINE void vp9_write_token(vp9_writer *w, const vp9_tree_index *tree, vp9_write_tree(w, tree, probs, token->value, token->len, 0); } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_TREEWRITER_H_ diff --git a/vp9/encoder/vp9_vaq.h b/vp9/encoder/vp9_vaq.h index c45c479de..c73114aeb 100644 --- a/vp9/encoder/vp9_vaq.h +++ b/vp9/encoder/vp9_vaq.h @@ -14,6 +14,10 @@ #include "vp9/encoder/vp9_onyx_int.h" +#ifdef __cplusplus +extern "C" { +#endif + unsigned int vp9_vaq_segment_id(int energy); double vp9_vaq_rdmult_ratio(int energy); double vp9_vaq_inv_q_ratio(int energy); @@ -23,4 +27,8 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi); int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_VAQ_H_ diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index 2ded97c55..3bc2091f8 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -12,7 +12,10 @@ #define VP9_ENCODER_VP9_VARIANCE_H_ #include "vpx/vpx_integer.h" -// #include "./vpx_config.h" + +#ifdef __cplusplus +extern "C" { +#endif void variance(const uint8_t *src_ptr, int source_stride, @@ -112,4 +115,8 @@ static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, ref += ref_stride; } } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_VARIANCE_H_ diff --git a/vp9/encoder/vp9_write_bit_buffer.h b/vp9/encoder/vp9_write_bit_buffer.h index dfed90370..5958b4806 100644 --- a/vp9/encoder/vp9_write_bit_buffer.h +++ b/vp9/encoder/vp9_write_bit_buffer.h @@ -15,6 +15,10 @@ #include "vpx/vpx_integer.h" +#ifdef __cplusplus +extern "C" { +#endif + struct vp9_write_bit_buffer { uint8_t *bit_buffer; size_t bit_offset; @@ -45,4 +49,8 @@ static void vp9_wb_write_literal(struct vp9_write_bit_buffer *wb, } +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_WRITE_BIT_BUFFER_H_ diff --git a/vp9/encoder/vp9_writer.h b/vp9/encoder/vp9_writer.h index 9cac7a84f..62f555c99 100644 --- a/vp9/encoder/vp9_writer.h +++ b/vp9/encoder/vp9_writer.h @@ -15,6 +15,10 @@ #include "vp9/common/vp9_prob.h" +#ifdef __cplusplus +extern "C" { +#endif + typedef struct { unsigned int lowvalue; unsigned int range; @@ -105,4 +109,8 @@ static void vp9_write_literal(vp9_writer *w, int data, int bits) { #define vp9_write_prob(w, v) vp9_write_literal((w), (v), 8) +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_VP9_WRITER_H_ diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c index d81b72bba..ea031fb07 100644 --- a/vp9/encoder/x86/vp9_dct_avx2.c +++ b/vp9/encoder/x86/vp9_dct_avx2.c @@ -163,7 +163,7 @@ static INLINE void transpose_4x4_avx2(__m128i *res) { res[3] = _mm_unpackhi_epi64(res[2], res[2]); } -void fdct4_1d_avx2(__m128i *in) { +void fdct4_avx2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); @@ -196,7 +196,7 @@ void fdct4_1d_avx2(__m128i *in) { transpose_4x4_avx2(in); } -void fadst4_1d_avx2(__m128i *in) { +void fadst4_avx2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); @@ -250,20 +250,20 @@ void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, load_buffer_4x4_avx2(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct4_1d_avx2(in); - fdct4_1d_avx2(in); + fdct4_avx2(in); + fdct4_avx2(in); break; case 1: // ADST_DCT - fadst4_1d_avx2(in); - fdct4_1d_avx2(in); + fadst4_avx2(in); + fdct4_avx2(in); break; case 2: // DCT_ADST - fdct4_1d_avx2(in); - fadst4_1d_avx2(in); + fdct4_avx2(in); + fadst4_avx2(in); break; case 3: // ADST_ADST - fadst4_1d_avx2(in); - fadst4_1d_avx2(in); + fadst4_avx2(in); + fadst4_avx2(in); break; default: assert(0); @@ -658,7 +658,7 @@ static INLINE void array_transpose_8x8_avx2(__m128i *in, __m128i *res) { // 07 17 27 37 47 57 67 77 } -void fdct8_1d_avx2(__m128i *in) { +void fdct8_avx2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -798,7 +798,7 @@ void fdct8_1d_avx2(__m128i *in) { array_transpose_8x8_avx2(in, in); } -void fadst8_1d_avx2(__m128i *in) { +void fadst8_avx2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1034,20 +1034,20 @@ void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, load_buffer_8x8_avx2(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct8_1d_avx2(in); - fdct8_1d_avx2(in); + fdct8_avx2(in); + fdct8_avx2(in); break; case 1: // ADST_DCT - fadst8_1d_avx2(in); - fdct8_1d_avx2(in); + fadst8_avx2(in); + fdct8_avx2(in); break; case 2: // DCT_ADST - fdct8_1d_avx2(in); - fadst8_1d_avx2(in); + fdct8_avx2(in); + fadst8_avx2(in); break; case 3: // ADST_ADST - fadst8_1d_avx2(in); - fadst8_1d_avx2(in); + fadst8_avx2(in); + fadst8_avx2(in); break; default: assert(0); @@ -1216,7 +1216,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) { step1_6 = _mm_sub_epi16(in01, in14); step1_7 = _mm_sub_epi16(in00, in15); } - // Work on the first eight values; fdct8_1d(input, even_results); + // Work on the first eight values; fdct8(input, even_results); { // Add/substract const __m128i q0 = _mm_add_epi16(input0, input7); @@ -1730,7 +1730,7 @@ static INLINE void right_shift_16x16_avx2(__m128i *res0, __m128i *res1) { right_shift_8x8_avx2(res1 + 8, 2); } -void fdct16_1d_8col_avx2(__m128i *in) { +void fdct16_8col_avx2(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); @@ -2052,7 +2052,7 @@ void fdct16_1d_8col_avx2(__m128i *in) { in[15] = _mm_packs_epi32(v[14], v[15]); } -void fadst16_1d_8col_avx2(__m128i *in) { +void fadst16_8col_avx2(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2522,15 +2522,15 @@ void fadst16_1d_8col_avx2(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -void fdct16_1d_avx2(__m128i *in0, __m128i *in1) { - fdct16_1d_8col_avx2(in0); - fdct16_1d_8col_avx2(in1); +void fdct16_avx2(__m128i *in0, __m128i *in1) { + fdct16_8col_avx2(in0); + fdct16_8col_avx2(in1); array_transpose_16x16_avx2(in0, in1); } -void fadst16_1d_avx2(__m128i *in0, __m128i *in1) { - fadst16_1d_8col_avx2(in0); - fadst16_1d_8col_avx2(in1); +void fadst16_avx2(__m128i *in0, __m128i *in1) { + fadst16_8col_avx2(in0); + fadst16_8col_avx2(in1); array_transpose_16x16_avx2(in0, in1); } @@ -2540,24 +2540,24 @@ void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output, load_buffer_16x16_avx2(input, in0, in1, stride); switch (tx_type) { case 0: // DCT_DCT - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); break; case 1: // ADST_DCT - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); break; case 2: // DCT_ADST - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); break; case 3: // ADST_ADST - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); break; default: assert(0); diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 65431bdbf..c876cc273 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -161,7 +161,7 @@ static INLINE void transpose_4x4(__m128i *res) { res[3] = _mm_unpackhi_epi64(res[2], res[2]); } -void fdct4_1d_sse2(__m128i *in) { +void fdct4_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); @@ -194,7 +194,7 @@ void fdct4_1d_sse2(__m128i *in) { transpose_4x4(in); } -void fadst4_1d_sse2(__m128i *in) { +void fadst4_sse2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); @@ -248,20 +248,20 @@ void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, load_buffer_4x4(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct4_1d_sse2(in); - fdct4_1d_sse2(in); + fdct4_sse2(in); + fdct4_sse2(in); break; case 1: // ADST_DCT - fadst4_1d_sse2(in); - fdct4_1d_sse2(in); + fadst4_sse2(in); + fdct4_sse2(in); break; case 2: // DCT_ADST - fdct4_1d_sse2(in); - fadst4_1d_sse2(in); + fdct4_sse2(in); + fadst4_sse2(in); break; case 3: // ADST_ADST - fadst4_1d_sse2(in); - fadst4_1d_sse2(in); + fadst4_sse2(in); + fadst4_sse2(in); break; default: assert(0); @@ -656,7 +656,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { // 07 17 27 37 47 57 67 77 } -void fdct8_1d_sse2(__m128i *in) { +void fdct8_sse2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -796,7 +796,7 @@ void fdct8_1d_sse2(__m128i *in) { array_transpose_8x8(in, in); } -void fadst8_1d_sse2(__m128i *in) { +void fadst8_sse2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1032,20 +1032,20 @@ void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, load_buffer_8x8(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct8_1d_sse2(in); - fdct8_1d_sse2(in); + fdct8_sse2(in); + fdct8_sse2(in); break; case 1: // ADST_DCT - fadst8_1d_sse2(in); - fdct8_1d_sse2(in); + fadst8_sse2(in); + fdct8_sse2(in); break; case 2: // DCT_ADST - fdct8_1d_sse2(in); - fadst8_1d_sse2(in); + fdct8_sse2(in); + fadst8_sse2(in); break; case 3: // ADST_ADST - fadst8_1d_sse2(in); - fadst8_1d_sse2(in); + fadst8_sse2(in); + fadst8_sse2(in); break; default: assert(0); @@ -1214,7 +1214,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { step1_6 = _mm_sub_epi16(in01, in14); step1_7 = _mm_sub_epi16(in00, in15); } - // Work on the first eight values; fdct8_1d(input, even_results); + // Work on the first eight values; fdct8(input, even_results); { // Add/substract const __m128i q0 = _mm_add_epi16(input0, input7); @@ -1728,7 +1728,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { right_shift_8x8(res1 + 8, 2); } -void fdct16_1d_8col(__m128i *in) { +void fdct16_8col(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); @@ -2050,7 +2050,7 @@ void fdct16_1d_8col(__m128i *in) { in[15] = _mm_packs_epi32(v[14], v[15]); } -void fadst16_1d_8col(__m128i *in) { +void fadst16_8col(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2520,15 +2520,15 @@ void fadst16_1d_8col(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -void fdct16_1d_sse2(__m128i *in0, __m128i *in1) { - fdct16_1d_8col(in0); - fdct16_1d_8col(in1); +void fdct16_sse2(__m128i *in0, __m128i *in1) { + fdct16_8col(in0); + fdct16_8col(in1); array_transpose_16x16(in0, in1); } -void fadst16_1d_sse2(__m128i *in0, __m128i *in1) { - fadst16_1d_8col(in0); - fadst16_1d_8col(in1); +void fadst16_sse2(__m128i *in0, __m128i *in1) { + fadst16_8col(in0); + fadst16_8col(in1); array_transpose_16x16(in0, in1); } @@ -2538,24 +2538,24 @@ void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, load_buffer_16x16(input, in0, in1, stride); switch (tx_type) { case 0: // DCT_DCT - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); break; case 1: // ADST_DCT - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); break; case 2: // DCT_ADST - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); break; case 3: // ADST_ADST - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); break; default: assert(0); diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h index e1fcf40f2..c15039ad8 100644 --- a/vp9/encoder/x86/vp9_mcomp_x86.h +++ b/vp9/encoder/x86/vp9_mcomp_x86.h @@ -12,6 +12,10 @@ #ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_ #define VP9_ENCODER_X86_VP9_MCOMP_X86_H_ +#ifdef __cplusplus +extern "C" { +#endif + #if HAVE_SSE3 #if !CONFIG_RUNTIME_CPU_DETECT @@ -36,5 +40,9 @@ #endif #endif +#ifdef __cplusplus +} // extern "C" +#endif + #endif // VP9_ENCODER_X86_VP9_MCOMP_X86_H_ |