diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_dct.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 566 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 175 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.c | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 68 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 123 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 52 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.h | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 44 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 95 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 88 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 28 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.h | 5 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_avx2.c | 74 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 74 |
17 files changed, 861 insertions, 569 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 0f4a6bb63..a840b480a 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -997,7 +997,7 @@ static INLINE int half_round_shift(int input) { return rv; } -static void dct32_1d(const int *input, int *output, int round) { +static void fdct32(const int *input, int *output, int round) { int step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; @@ -1329,7 +1329,7 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } @@ -1339,13 +1339,13 @@ void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; } } -// Note that although we use dct_32_round in dct32_1d computation flow, +// Note that although we use dct_32_round in dct32 computation flow, // this 2d fdct32x32 for rate-distortion optimization loop is operating // within 16 bits precision. void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { @@ -1357,7 +1357,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; - dct32_1d(temp_in, temp_out, 0); + fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; @@ -1370,7 +1370,7 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { int temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; - dct32_1d(temp_in, temp_out, 1); + fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) out[j + i * 32] = temp_out[j]; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 317ac9815..b97fd0293 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -380,8 +380,10 @@ static void select_in_frame_q_segment(VP9_COMP *cpi, segment = 0; } - complexity_metric = - clamp((int)((projected_rate * 64) / target_rate), 16, 255); + if (target_rate > 0) { + complexity_metric = + clamp((int)((projected_rate * 64) / target_rate), 16, 255); + } } // Fill in the entires in the segment map corresponding to this SB64 @@ -1029,131 +1031,171 @@ static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { } return 0; } - -// TODO(jingning) This currently serves as a test framework for non-RD mode -// decision. To be continued on optimizing the partition type decisions. -static void pick_partition_type(VP9_COMP *cpi, - const TileInfo *const tile, - MODE_INFO **mi_8x8, TOKENEXTRA **tp, - int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, - int do_recon) { +static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, + BLOCK_SIZE bsize, int output_enabled) { + int i; VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - const int mi_stride = cm->mode_info_stride; - const int num_8x8_subsize = (num_8x8_blocks_wide_lookup[bsize] >> 1); - int i; - PARTITION_TYPE partition = PARTITION_NONE; - BLOCK_SIZE subsize; - BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type; - int sub_rate[4] = {0}; - int64_t sub_dist[4] = {0}; - int mi_offset; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) - return; + const int mb_mode_index = ctx->best_mode_index; + int max_plane; - partition = partition_lookup[b_width_log2(bsize)][bs_type]; - subsize = get_subsize(bsize, partition); + max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1; + for (i = 0; i < max_plane; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][1]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; + p[i].eobs = ctx->eobs_pbuf[i][1]; + } + + for (i = max_plane; i < MAX_MB_PLANE; ++i) { + p[i].coeff = ctx->coeff_pbuf[i][2]; + p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; + pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; + p[i].eobs = ctx->eobs_pbuf[i][2]; + } + + x->skip = ctx->skip; + + if (frame_is_intra_only(cm)) { +#if CONFIG_INTERNAL_STATS + static const int kf_mode_index[] = { + THR_DC /*DC_PRED*/, + THR_V_PRED /*V_PRED*/, + THR_H_PRED /*H_PRED*/, + THR_D45_PRED /*D45_PRED*/, + THR_D135_PRED /*D135_PRED*/, + THR_D117_PRED /*D117_PRED*/, + THR_D153_PRED /*D153_PRED*/, + THR_D207_PRED /*D207_PRED*/, + THR_D63_PRED /*D63_PRED*/, + THR_TM /*TM_PRED*/, + }; + ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]]; +#endif + } else { + // Note how often each mode chosen as best + cpi->mode_chosen_counts[mb_mode_index]++; + if (is_inter_block(mbmi) && + (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { + int_mv best_mv[2]; + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) + best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; + vp9_update_mv_count(cpi, x, best_mv); + } + + if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } + } +} + +static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; if (bsize < BLOCK_8X8) { // When ab_index = 0 all sub-blocks are handled, so for ab_index != 0 // there is nothing to be done. - if (x->ab_index != 0) { - *rate = 0; - *dist = 0; + if (x->ab_index > 0) return; - } + } + set_offsets(cpi, tile, mi_row, mi_col, bsize); + update_state_rt(cpi, get_block_context(x, bsize), bsize, output_enabled); + + encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); + update_stats(cpi); + + (*tp)->token = EOSB_TOKEN; + (*tp)++; +} + +static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, + TOKENEXTRA **tp, int mi_row, int mi_col, + int output_enabled, BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + int ctx; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int idx_str = xd->mode_info_stride * mi_row + mi_col; + MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str; + ctx = partition_plane_context(cpi->above_seg_context, cpi->left_seg_context, + mi_row, mi_col, bsize); + subsize = mi_8x8[0]->mbmi.sb_type; + } else { - *(get_sb_partitioning(x, bsize)) = subsize; + ctx = 0; + subsize = BLOCK_4X4; } + partition = partition_lookup[bsl][subsize]; + switch (partition) { case PARTITION_NONE: - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, rate, dist, - bsize, get_block_context(x, bsize), INT64_MAX); + if (output_enabled && bsize >= BLOCK_8X8) + cm->counts.partition[ctx][PARTITION_NONE]++; + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); break; - case PARTITION_HORZ: + case PARTITION_VERT: + if (output_enabled) + cm->counts.partition[ctx][PARTITION_VERT]++; *get_sb_index(x, subsize) = 0; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0], - subsize, get_block_context(x, subsize), INT64_MAX); - if (bsize >= BLOCK_8X8 && mi_row + num_8x8_subsize < cm->mi_rows) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + if (mi_col + hbs < cm->mi_cols) { *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row + num_8x8_subsize, mi_col, - &sub_rate[1], &sub_dist[1], subsize, - get_block_context(x, subsize), INT64_MAX); + encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + subsize); } - *rate = sub_rate[0] + sub_rate[1]; - *dist = sub_dist[0] + sub_dist[1]; break; - case PARTITION_VERT: + case PARTITION_HORZ: + if (output_enabled) + cm->counts.partition[ctx][PARTITION_HORZ]++; *get_sb_index(x, subsize) = 0; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sub_rate[0], &sub_dist[0], - subsize, get_block_context(x, subsize), INT64_MAX); - if (bsize >= BLOCK_8X8 && mi_col + num_8x8_subsize < cm->mi_cols) { - update_state(cpi, get_block_context(x, subsize), subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize); + encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + if (mi_row + hbs < cm->mi_rows) { *get_sb_index(x, subsize) = 1; - rd_pick_sb_modes(cpi, tile, mi_row, mi_col + num_8x8_subsize, - &sub_rate[1], &sub_dist[1], subsize, - get_block_context(x, subsize), INT64_MAX); + encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + subsize); } - *rate = sub_rate[0] + sub_rate[1]; - *dist = sub_dist[1] + sub_dist[1]; break; case PARTITION_SPLIT: - *get_sb_index(x, subsize) = 0; - pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, subsize, - &sub_rate[0], &sub_dist[0], 0); - - if ((mi_col + num_8x8_subsize) < cm->mi_cols) { - *get_sb_index(x, subsize) = 1; - pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize, tp, - mi_row, mi_col + num_8x8_subsize, subsize, - &sub_rate[1], &sub_dist[1], 0); - } - - if ((mi_row + num_8x8_subsize) < cm->mi_rows) { - *get_sb_index(x, subsize) = 2; - pick_partition_type(cpi, tile, mi_8x8 + num_8x8_subsize * mi_stride, tp, - mi_row + num_8x8_subsize, mi_col, subsize, - &sub_rate[2], &sub_dist[2], 0); - } - - if ((mi_col + num_8x8_subsize) < cm->mi_cols && - (mi_row + num_8x8_subsize) < cm->mi_rows) { - *get_sb_index(x, subsize) = 3; - mi_offset = num_8x8_subsize * mi_stride + num_8x8_subsize; - pick_partition_type(cpi, tile, mi_8x8 + mi_offset, tp, - mi_row + num_8x8_subsize, mi_col + num_8x8_subsize, - subsize, &sub_rate[3], &sub_dist[3], 0); - } - - for (i = 0; i < 4; ++i) { - *rate += sub_rate[i]; - *dist += sub_dist[i]; - } + subsize = get_subsize(bsize, PARTITION_SPLIT); + if (output_enabled) + cm->counts.partition[ctx][PARTITION_SPLIT]++; + *get_sb_index(x, subsize) = 0; + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize); + *get_sb_index(x, subsize) = 1; + encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + subsize); + *get_sb_index(x, subsize) = 2; + encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + subsize); + *get_sb_index(x, subsize) = 3; + encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, + subsize); break; default: - assert(0); + assert("Invalid partition type."); } - if (do_recon) { - int output_enabled = (bsize == BLOCK_64X64); - - // Check the projected output rate for this SB against it's target - // and and if necessary apply a Q delta using segmentation to get - // closer to the target. - if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - select_in_frame_q_segment(cpi, mi_row, mi_col, - output_enabled, *rate); - } - - encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize); - } + if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) + update_partition_context(cpi->above_seg_context, cpi->left_seg_context, + mi_row, mi_col, subsize, bsize); } static void rd_use_partition(VP9_COMP *cpi, @@ -1444,15 +1486,19 @@ static void rd_use_partition(VP9_COMP *cpi, } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 + BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, + BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, + BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, + BLOCK_16X16 }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 + BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, + BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, + BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, + BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, + BLOCK_64X64 }; // Look at all the mode_info entries for blocks that are part of this @@ -1538,9 +1584,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, } } - // Give a bit of leaway either side of the observed min and max - *min_block_size = min_partition_size[*min_block_size]; - *max_block_size = max_partition_size[*max_block_size]; + // adjust observed min and max + if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { + *min_block_size = min_partition_size[*min_block_size]; + *max_block_size = max_partition_size[*max_block_size]; + } // Check border cases where max and min from neighbours may not be legal. *max_block_size = find_partition_size(*max_block_size, @@ -1996,34 +2044,6 @@ static void rd_pick_reference_frame(VP9_COMP *cpi, const TileInfo *const tile, restore_context(cpi, mi_row, mi_col, a, l, sa, sl, BLOCK_64X64); } -static void encode_sb_row_rt(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, TOKENEXTRA **tp) { - VP9_COMMON *const cm = &cpi->common; - int mi_col; - - cpi->sf.always_this_block_size = BLOCK_8X8; - - // Initialize the left context for the new SB row - vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); - vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); - - // Code each SB in the row - for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; - mi_col += MI_BLOCK_SIZE) { - int dummy_rate; - int64_t dummy_dist; - const int idx_str = cm->mode_info_stride * mi_row + mi_col; - MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; - - vp9_zero(cpi->mb.pred_mv); - - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col); - pick_partition_type(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rate, &dummy_dist, 1); - } -} - static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; @@ -2250,11 +2270,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += 8) -#if 1 encode_sb_row(cpi, &tile, mi_row, &tp); -#else - encode_sb_row_rt(cpi, &tile, mi_row, &tp); -#endif cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); @@ -2433,6 +2449,264 @@ static void select_tx_mode(VP9_COMP *cpi) { } } } +// Start RTC Exploration +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, + MB_PREDICTION_MODE mode, int mi_row, int mi_col) { + mbmi->interp_filter = EIGHTTAP; + mbmi->mode = mode; + mbmi->mv[0].as_int = 0; + mbmi->mv[1].as_int = 0; + if (mode < NEARESTMV) { + mbmi->ref_frame[0] = INTRA_FRAME; + } else { + mbmi->ref_frame[0] = LAST_FRAME; + } + + mbmi->ref_frame[1] = INTRA_FRAME; + mbmi->tx_size = max_txsize_lookup[bsize]; + mbmi->uv_mode = mode; + mbmi->skip_coeff = 0; + mbmi->sb_type = bsize; + mbmi->segment_id = 0; +} +static inline int get_block_row(int b32i, int b16i, int b8i) { + return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1); +} +static inline int get_block_col(int b32i, int b16i, int b8i) { + return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); +} +static void rtc_use_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + TOKENEXTRA **tp, int mi_row, int mi_col, + BLOCK_SIZE bsize, int *rate, int64_t *dist, + int do_recon) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int mis = cm->mode_info_stride; + int mi_width = num_8x8_blocks_wide_lookup[cpi->sf.always_this_block_size]; + int mi_height = num_8x8_blocks_high_lookup[cpi->sf.always_this_block_size]; + int i, j; + int chosen_rate = INT_MAX; + int64_t chosen_dist = INT_MAX; + MB_PREDICTION_MODE mode = DC_PRED; + int row8x8_remaining = tile->mi_row_end - mi_row; + int col8x8_remaining = tile->mi_col_end - mi_col; + int b32i; + x->fast_ms = 0; + x->subblock_ref = 0; + for (b32i = 0; b32i < 4; b32i++) { + int b16i; + for (b16i = 0; b16i < 4; b16i++) { + int b8i; + int block_row = get_block_row(b32i, b16i, 0); + int block_col = get_block_col(b32i, b16i, 0); + int index = block_row * mis + block_col; + int rate; + int64_t dist; + + int_mv frame_nearest_mv[MAX_REF_FRAMES]; + int_mv frame_near_mv[MAX_REF_FRAMES]; + struct buf_2d yv12_mb[MAX_REF_FRAMES][MAX_MB_PLANE]; + + // Find a partition size that fits + bsize = find_partition_size(cpi->sf.always_this_block_size, + (row8x8_remaining - block_row), + (col8x8_remaining - block_col), + &mi_height, &mi_width); + mi_8x8[index] = mi_8x8[0] + index; + + set_mi_row_col(xd, tile, mi_row + block_row, mi_height, + mi_col + block_col, mi_width, cm->mi_rows, cm->mi_cols); + + xd->mi_8x8 = mi_8x8 + index; + + if (cm->frame_type != KEY_FRAME) { + set_offsets(cpi, tile, mi_row + block_row, mi_col + block_col, bsize); + + vp9_pick_inter_mode(cpi, x, tile, + mi_row + block_row, mi_col + block_col, + &rate, &dist, cpi->sf.always_this_block_size); + } else { + set_mode_info(&mi_8x8[index]->mbmi, bsize, mode, + mi_row + block_row, mi_col + block_col); + vp9_setup_buffer_inter(cpi, x, tile, + LAST_FRAME, cpi->sf.always_this_block_size, + mi_row + block_row, mi_col + block_col, + frame_nearest_mv, frame_near_mv, yv12_mb); + } + + for (j = 0; j < mi_height; j++) + for (i = 0; i < mi_width; i++) + if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > i + && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > j) { + mi_8x8[index+ i + j * mis] = mi_8x8[index]; + } + + for (b8i = 0; b8i < 4; b8i++) { + } + } + } + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); + + *rate = chosen_rate; + *dist = chosen_dist; +} + +static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, TOKENEXTRA **tp) { + VP9_COMMON * const cm = &cpi->common; + int mi_col; + + // Initialize the left context for the new SB row + vpx_memset(&cpi->left_context, 0, sizeof(cpi->left_context)); + vpx_memset(cpi->left_seg_context, 0, sizeof(cpi->left_seg_context)); + + // Code each SB in the row + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + int dummy_rate; + int64_t dummy_dist; + + const int idx_str = cm->mode_info_stride * mi_row + mi_col; + MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; + + cpi->mb.source_variance = UINT_MAX; + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col); + rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1); + } +} + + +static void encode_rtc_frame_internal(VP9_COMP *cpi) { + int mi_row; + MACROBLOCK * const x = &cpi->mb; + VP9_COMMON * const cm = &cpi->common; + MACROBLOCKD * const xd = &x->e_mbd; + +// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", +// cpi->common.current_video_frame, cpi->common.show_frame, +// cm->frame_type); + +// debug output +#if DBG_PRNT_SEGMAP + { + FILE *statsfile; + statsfile = fopen("segmap2.stt", "a"); + fprintf(statsfile, "\n"); + fclose(statsfile); + } +#endif + + vp9_zero(cm->counts.switchable_interp); + vp9_zero(cpi->tx_stepdown_count); + + xd->mi_8x8 = cm->mi_grid_visible; + // required for vp9_frame_init_quantizer + xd->mi_8x8[0] = cm->mi; + + xd->last_mi = cm->prev_mi; + + vp9_zero(cpi->common.counts.mv); + vp9_zero(cpi->coef_counts); + vp9_zero(cm->counts.eob_branch); + + cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 + && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; + switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); + + vp9_frame_init_quantizer(cpi); + + vp9_initialize_rd_consts(cpi); + vp9_initialize_me_consts(cpi, cm->base_qindex); + switch_tx_mode(cpi); + cpi->sf.always_this_block_size = BLOCK_16X16; + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + // Initialize encode frame context. + init_encode_frame_mb_context(cpi); + + // Build a frame level activity map + build_activity_map(cpi); + } + + // Re-initialize encode frame context. + init_encode_frame_mb_context(cpi); + + vp9_zero(cpi->rd_comp_pred_diff); + vp9_zero(cpi->rd_filter_diff); + vp9_zero(cpi->rd_tx_select_diff); + vp9_zero(cpi->rd_tx_select_threshes); + + set_prev_mi(cm); + + { + struct vpx_usec_timer emr_timer; + vpx_usec_timer_start(&emr_timer); + + { + // Take tiles into account and give start/end MB + int tile_col, tile_row; + TOKENEXTRA *tp = cpi->tok; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + + for (tile_row = 0; tile_row < tile_rows; tile_row++) { + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + TileInfo tile; + TOKENEXTRA *tp_old = tp; + + // For each row of SBs in the frame + vp9_tile_init(&tile, cm, tile_row, tile_col); + for (mi_row = tile.mi_row_start; + mi_row < tile.mi_row_end; mi_row += 8) + encode_rtc_sb_row(cpi, &tile, mi_row, &tp); + + cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); + assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); + } + } + } + + vpx_usec_timer_mark(&emr_timer); + cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); + } + + if (cpi->sf.skip_encode_sb) { + int j; + unsigned int intra_count = 0, inter_count = 0; + for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { + intra_count += cm->counts.intra_inter[j][0]; + inter_count += cm->counts.intra_inter[j][1]; + } + cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); + cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); + cpi->sf.skip_encode_frame &= cm->show_frame; + } else { + cpi->sf.skip_encode_frame = 0; + } + +#if 0 + // Keep record of the total distortion this time around for future use + cpi->last_frame_distortion = cpi->frame_distortion; +#endif +} +// end RTC play code + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -2512,7 +2786,11 @@ void vp9_encode_frame(VP9_COMP *cpi) { select_tx_mode(cpi); cm->reference_mode = reference_mode; cm->interp_filter = interp_filter; - encode_frame_internal(cpi); + + if (cpi->compressor_speed == 3) + encode_rtc_frame_internal(cpi); + else + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs); @@ -2590,7 +2868,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } } } else { - encode_frame_internal(cpi); + encode_rtc_frame_internal(cpi); } } @@ -2666,7 +2944,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && - (cpi->oxcf.aq_mode != COMPLEXITY_AQ); + (cpi->oxcf.aq_mode != COMPLEXITY_AQ) && + cpi->compressor_speed != 3; x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; @@ -2681,7 +2960,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_update_zbin_extra(cpi, x); } } else { - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Adjust the zbin based on this MB rate. diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index d1485307d..8ff23c79a 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -25,24 +25,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_tokenize.h" -void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter, - VP9_COMMON *cm) { - if (xd->mi_8x8 && xd->mi_8x8[0]) { - MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; - - set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - LAST_FRAME, - mbmi->ref_frame[1] - LAST_FRAME); - - } else { - set_ref_ptrs(cm, xd, -1, -1); - } - - xd->subpix.filter_x = xd->subpix.filter_y = - vp9_get_interp_kernel(filter == SWITCHABLE ? EIGHTTAP : filter); - - assert(((intptr_t)xd->subpix.filter_x & 0xff) == 0); -} - void vp9_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, @@ -356,7 +338,6 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize, pd->above_context, pd->left_context, num_4x4_w, num_4x4_h); } - void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index c728efd49..9f6c9f069 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -48,8 +48,7 @@ void vp9_encode_intra_block_y(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_encode_intra_block_uv(MACROBLOCK *x, BLOCK_SIZE bsize); int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred); -void vp9_setup_interp_filters(MACROBLOCKD *xd, INTERP_FILTER filter, - VP9_COMMON *cm); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 395ce2008..af9fa1ba7 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -447,6 +447,16 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } } +static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { + if (2 * mb_col + 1 < cm->mi_cols) { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 + : BLOCK_16X8; + } else { + return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 + : BLOCK_8X8; + } +} + void vp9_first_pass(VP9_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; @@ -481,10 +491,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; struct twopass_rc *const twopass = &cpi->twopass; - - int_mv zero_ref_mv; - - zero_ref_mv.as_int = 0; + const MV zero_mv = {0, 0}; vp9_clear_system_state(); // __asm emms; @@ -493,8 +500,7 @@ void vp9_first_pass(VP9_COMP *cpi) { setup_dst_planes(xd, new_yv12, 0, 0); xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; + xd->mi_8x8[0] = cm->mi; // required for vp9_frame_init_quantizer setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); @@ -508,14 +514,8 @@ void vp9_first_pass(VP9_COMP *cpi) { } x->skip_recode = 0; - - // Initialise the MV cost table to the defaults - // if( cm->current_video_frame == 0) - // if ( 0 ) - { - vp9_init_mv_probs(cm); - vp9_initialize_rd_consts(cpi); - } + vp9_init_mv_probs(cm); + vp9_initialize_rd_consts(cpi); // tiling is ignored in the first pass vp9_tile_init(&tile, cm, 0, 0); @@ -540,8 +540,9 @@ void vp9_first_pass(VP9_COMP *cpi) { // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int this_error; - int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); double error_weight = 1.0; + const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); vp9_clear_system_state(); // __asm emms; @@ -549,30 +550,15 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - - if (mb_col * 2 + 1 < cm->mi_cols) { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_16X8; - } - } else { - if (mb_row * 2 + 1 < cm->mi_rows) { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X16; - } else { - xd->mi_8x8[0]->mbmi.sb_type = BLOCK_8X8; - } - } + xd->mi_8x8[0]->mbmi.sb_type = bsize; xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, - mb_row << 1, - num_8x8_blocks_high_lookup[xd->mi_8x8[0]->mbmi.sb_type], - mb_col << 1, - num_8x8_blocks_wide_lookup[xd->mi_8x8[0]->mbmi.sb_type], + mb_row << 1, num_8x8_blocks_high_lookup[bsize], + mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - int energy = vp9_block_energy(cpi, x, xd->mi_8x8[0]->mbmi.sb_type); + const int energy = vp9_block_energy(cpi, x, bsize); error_weight = vp9_vaq_inv_q_ratio(energy); } @@ -598,8 +584,7 @@ void vp9_first_pass(VP9_COMP *cpi) { // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) - + BORDER_MV_PIXELS_B16; + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search if (cm->current_video_frame > 0) { @@ -624,7 +609,7 @@ void vp9_first_pass(VP9_COMP *cpi) { // based search as well. if (best_ref_mv.as_int) { tmp_err = INT_MAX; - first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv, + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &tmp_err); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; @@ -645,17 +630,15 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; gf_motion_error = zz_motion_search(cpi, x); - first_pass_motion_search(cpi, x, &zero_ref_mv.as_mv, &tmp_mv.as_mv, + first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &gf_motion_error); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; gf_motion_error *= error_weight; } - if ((gf_motion_error < motion_error) && - (gf_motion_error < this_error)) { + if (gf_motion_error < motion_error && gf_motion_error < this_error) second_ref_count++; - } // Reset to last frame as reference buffer xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; @@ -692,9 +675,8 @@ void vp9_first_pass(VP9_COMP *cpi) { xd->mi_8x8[0]->mbmi.tx_size = TX_4X4; xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; xd->mi_8x8[0]->mbmi.ref_frame[1] = NONE; - vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, - xd->mi_8x8[0]->mbmi.sb_type); - vp9_encode_sby(x, xd->mi_8x8[0]->mbmi.sb_type); + vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); + vp9_encode_sby(x, bsize); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; @@ -784,13 +766,11 @@ void vp9_first_pass(VP9_COMP *cpi) { fps.mvr_abs = (double)sum_mvr_abs / mvcount; fps.MVc = (double)sum_mvc / mvcount; fps.mvc_abs = (double)sum_mvc_abs / mvcount; - fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / - mvcount; - fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / - mvcount; + fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / mvcount)) / mvcount; + fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount; fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2); fps.new_mv_count = new_mv_count; - fps.pcnt_motion = (double)mvcount / cpi->common.MBs; + fps.pcnt_motion = (double)mvcount / cm->MBs; } else { fps.MVr = 0.0; fps.mvr_abs = 0.0; @@ -918,8 +898,7 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -static int estimate_max_q(VP9_COMP *cpi, - FIRSTPASS_STATS *fpstats, +static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh) { int q; const int num_mbs = cpi->common.MBs; @@ -1093,12 +1072,12 @@ void vp9_end_second_pass(VP9_COMP *cpi) { // This function gives and estimate of how badly we believe // the prediction quality is decaying from frame to frame. -static double get_prediction_decay_rate(VP9_COMP *cpi, - FIRSTPASS_STATS *next_frame) { +static double get_prediction_decay_rate(const VP9_COMMON *cm, + const FIRSTPASS_STATS *next_frame) { // Look at the observed drop in prediction quality between the last frame // and the GF buffer (which contains an older frame). const double mb_sr_err_diff = (next_frame->sr_coded_error - - next_frame->coded_error) / cpi->common.MBs; + next_frame->coded_error) / cm->MBs; const double second_ref_decay = mb_sr_err_diff <= 512.0 ? fclamp(pow(1.0 - (mb_sr_err_diff / 512.0), 0.5), 0.85, 1.0) : 0.85; @@ -1126,7 +1105,6 @@ static int detect_transition_to_still( int j; FIRSTPASS_STATS *position = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_next_frame; - double zz_inter; // Look ahead a few frames to see if static condition // persists... @@ -1134,11 +1112,10 @@ static int detect_transition_to_still( if (EOF == input_stats(&cpi->twopass, &tmp_next_frame)) break; - zz_inter = (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion); - if (zz_inter < 0.999) + if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999) break; } - // Reset file position + reset_fpf_position(&cpi->twopass, position); // Only if it does do we signal a transition to still @@ -1152,14 +1129,14 @@ static int detect_transition_to_still( // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this -static int detect_flash(VP9_COMP *cpi, int offset) { +static int detect_flash(const struct twopass_rc *twopass, int offset) { FIRSTPASS_STATS next_frame; int flash_detected = 0; // Read the frame data. // The return is FALSE (no flash detected) if not a valid frame - if (read_frame_stats(&cpi->twopass, &next_frame, offset) != EOF) { + if (read_frame_stats(twopass, &next_frame, offset) != EOF) { // What we are looking for here is a situation where there is a // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. @@ -1188,16 +1165,15 @@ static void accumulate_frame_motion_stats( // Accumulate Motion In/Out of frame stats *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; - *abs_mv_in_out_accumulator += - fabs(this_frame->mv_in_out_count * motion_pct); + *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct); // Accumulate a measure of how uniform (or conversely how random) // the motion field is. (A ratio of absmv / mv) if (motion_pct > 0.05) { - double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / + const double this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr)); - double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / + const double this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc)); *mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs) @@ -1240,7 +1216,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames, int *f_boost, int *b_boost) { FIRSTPASS_STATS this_frame; - + struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; @@ -1253,7 +1229,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // Search forward from the proposed arf/next gf position for (i = 0; i < f_frames; i++) { - if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF) + if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF) break; // Update the motion related elements to the boost calculation @@ -1264,12 +1240,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); + flash_detected = detect_flash(twopass, i + offset) || + detect_flash(twopass, i + offset + 1); // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1290,7 +1266,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // Search backward towards last gf position for (i = -1; i >= -b_frames; i--) { - if (read_frame_stats(&cpi->twopass, &this_frame, (i + offset)) == EOF) + if (read_frame_stats(twopass, &this_frame, (i + offset)) == EOF) break; // Update the motion related elements to the boost calculation @@ -1301,12 +1277,12 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, // We want to discount the the flash frame itself and the recovery // frame that follows as both will have poor scores. - flash_detected = detect_flash(cpi, (i + offset)) || - detect_flash(cpi, (i + offset + 1)); + flash_detected = detect_flash(twopass, i + offset) || + detect_flash(twopass, i + offset + 1); // Cumulative effect of prediction quality decay if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, &this_frame); + decay_accumulator *= get_prediction_decay_rate(&cpi->common, &this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } @@ -1466,6 +1442,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) { static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame = { 0 }; FIRSTPASS_STATS *start_pos; + struct twopass_rc *const twopass = &cpi->twopass; int i; double boost_score = 0.0; double old_boost_score = 0.0; @@ -1486,8 +1463,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { double mv_ratio_accumulator_thresh; int max_bits = frame_max_bits(cpi); // Max for a single frame - unsigned int allow_alt_ref = - cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; + unsigned int allow_alt_ref = cpi->oxcf.play_alternate && + cpi->oxcf.lag_in_frames; int f_boost = 0; int b_boost = 0; @@ -1495,11 +1472,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { int active_max_gf_interval; RATE_CONTROL *const rc = &cpi->rc; - cpi->twopass.gf_group_bits = 0; + twopass->gf_group_bits = 0; vp9_clear_system_state(); // __asm emms; - start_pos = cpi->twopass.stats_in; + start_pos = twopass->stats_in; // Load stats for the current frame. mod_frame_err = calculate_modified_err(cpi, this_frame); @@ -1530,20 +1507,19 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { active_max_gf_interval = rc->max_gf_interval; i = 0; - while ((i < cpi->twopass.static_scene_max_gf_interval) && - (i < rc->frames_to_key)) { + while (i < twopass->static_scene_max_gf_interval && i < rc->frames_to_key) { i++; // Increment the loop counter // Accumulate error score of frames in this gf group mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; - if (EOF == input_stats(&cpi->twopass, &next_frame)) + if (EOF == input_stats(twopass, &next_frame)) break; // Test for the case where there is a brief flash but the prediction // quality back to an earlier frame is then restored. - flash_detected = detect_flash(cpi, 0); + flash_detected = detect_flash(twopass, 0); // Update the motion related elements to the boost calculation accumulate_frame_motion_stats(&next_frame, @@ -1554,14 +1530,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Cumulative effect of prediction quality decay if (!flash_detected) { last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; // Monitor for static sections. if ((next_frame.pcnt_inter - next_frame.pcnt_motion) < zero_motion_accumulator) { - zero_motion_accumulator = - (next_frame.pcnt_inter - next_frame.pcnt_motion); + zero_motion_accumulator = next_frame.pcnt_inter - + next_frame.pcnt_motion; } // Break clause to detect very still sections after motion @@ -1599,14 +1575,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { old_boost_score = boost_score; } - cpi->twopass.gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); + twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); // Don't allow a gf too near the next kf if ((rc->frames_to_key - i) < MIN_GF_INTERVAL) { while (i < (rc->frames_to_key + !rc->next_key_frame_forced)) { i++; - if (EOF == input_stats(&cpi->twopass, this_frame)) + if (EOF == input_stats(twopass, this_frame)) break; if (i < rc->frames_to_key) { @@ -2069,7 +2045,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // How fast is prediction quality decaying - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); // We want to know something about the recent past... rather than // as used elsewhere where we are concened with decay in prediction @@ -2203,8 +2179,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { r = RMAX; // How fast is prediction quality decaying - if (!detect_flash(cpi, 0)) { - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); + if (!detect_flash(twopass, 0)) { + loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame); decay_accumulator *= loop_decay_rate; decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; @@ -2333,6 +2309,7 @@ void vp9_get_svc_params(VP9_COMP *cpi) { (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; + cpi->rc.source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; } @@ -2340,6 +2317,9 @@ void vp9_get_svc_params(VP9_COMP *cpi) { cpi->rc.baseline_gf_interval = INT_MAX; } +// Use this macro to turn on/off use of alt-refs in one-pass mode. +#define USE_ALTREF_FOR_ONE_PASS 1 + void vp9_get_one_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && @@ -2351,13 +2331,20 @@ void vp9_get_one_pass_params(VP9_COMP *cpi) { cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && cpi->rc.frames_to_key == 0; cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = 300; + cpi->rc.kf_boost = 2000; + cpi->rc.source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; } if (cpi->rc.frames_till_gf_update_due == 0) { + cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; + // NOTE: frames_till_gf_update_due must be <= frames_to_key. + if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key) + cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key; cpi->refresh_golden_frame = 1; + cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; + cpi->rc.gfu_boost = 1000; } } @@ -2371,7 +2358,8 @@ void vp9_get_one_pass_cbr_params(VP9_COMP *cpi) { cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && cpi->rc.frames_to_key == 0; cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = 300; + cpi->rc.kf_boost = 2000; + cpi->rc.source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; } @@ -2405,12 +2393,13 @@ void vp9_get_second_pass_params(VP9_COMP *cpi) { double this_frame_intra_error; double this_frame_coded_error; + if (!cpi->twopass.stats_in) + return; if (cpi->refresh_alt_ref_frame) { cpi->common.frame_type = INTER_FRAME; + rc->per_frame_bandwidth = cpi->twopass.gf_bits; return; } - if (!cpi->twopass.stats_in) - return; vp9_clear_system_state(); diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index ee73ff15a..e6e59c05a 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -173,7 +173,6 @@ struct lookahead_entry * vp9_lookahead_peek(struct lookahead_ctx *ctx, int index) { struct lookahead_entry *buf = NULL; - assert(index < (int)ctx->max_sz); if (index < (int)ctx->sz) { index += ctx->read_idx; if (index >= (int)ctx->max_sz) diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 88023513a..6bc88ec44 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -626,7 +626,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -663,7 +663,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->disable_filter_search_var_thresh = 100; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -698,7 +698,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->disable_filter_search_var_thresh = 200; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -797,7 +797,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; - sf->auto_min_max_partition_size = 1; + sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; @@ -839,6 +839,9 @@ static void set_rt_speed_feature(VP9_COMMON *cm, if (speed >= 5) { int i; sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->auto_min_max_partition_size = frame_is_intra_only(cm) ? + RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX; + sf->subpel_force_stop = 1; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY; @@ -867,6 +870,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->recode_loop = 1; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; + sf->subpel_force_stop = 0; sf->optimize_coefficients = !cpi->oxcf.lossless; sf->reduce_first_step_size = 0; sf->auto_mv_step_size = 0; @@ -882,7 +886,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_one_partition_size_always = 0; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; - sf->auto_min_max_partition_size = 0; + sf->auto_min_max_partition_size = NOT_IN_USE; sf->max_partition_size = BLOCK_64X64; sf->min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; @@ -1258,6 +1262,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->pass = 2; cpi->compressor_speed = 0; break; + + case MODE_REALTIME: + cpi->pass = 0; + cpi->compressor_speed = 3; + break; } cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; @@ -2541,7 +2550,10 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vpx_usec_timer_start(&timer); - vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); + if (cpi->compressor_speed == 3) + lf->filter_level = 4; + else + vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.use_fast_lpf_pick); vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); @@ -2730,7 +2742,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if (cpi->sf.recode_loop != 0) { vp9_save_coding_context(cpi); cpi->dummy_packing = 1; - vp9_pack_bitstream(cpi, dest, size); + if (cpi->compressor_speed != 3) + vp9_pack_bitstream(cpi, dest, size); + cpi->rc.projected_frame_size = (*size) << 3; vp9_restore_coding_context(cpi); @@ -2953,15 +2967,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Clear down mmx registers to allow floating point in what follows. vp9_clear_system_state(); - // For an alt ref frame in 2 pass we skip the call to the second - // pass function that sets the target bandwidth so we must set it here. - if (cpi->refresh_alt_ref_frame) { - // Set a per frame bit target for the alt ref frame. - cpi->rc.per_frame_bandwidth = cpi->twopass.gf_bits; - // Set a per second target bitrate. - cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_framerate); - } - // Clear zbin over-quant value and mode boost values. cpi->zbin_mode_boost = 0; @@ -3088,11 +3093,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); - // Decide q and q bounds + // Decide q and q bounds. q = vp9_rc_pick_q_and_adjust_q_bounds(cpi, &bottom_index, &top_index); + // JBB : This is realtime mode. In real time mode the first frame + // should be larger. Q of 0 is disabled because we force tx size to be + // 16x16... + if (cpi->compressor_speed == 3) { + if (cpi->common.current_video_frame == 0) + q /= 3; + + if (q == 0) + q++; + } + if (!frame_is_intra_only(cm)) { cm->interp_filter = DEFAULT_INTERP_FILTER; /* TODO: Decide this more intelligently */ @@ -3293,7 +3309,6 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_get_second_pass_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); - // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt"); vp9_twopass_postencode_update(cpi, *size); } @@ -3402,6 +3417,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, int64_t *time_stamp, int64_t *time_end, int flush) { VP9_COMP *cpi = (VP9_COMP *) ptr; VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &cpi->mb.e_mbd; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; MV_REFERENCE_FRAME ref_frame; @@ -3449,8 +3465,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, if (cpi->oxcf.arnr_max_frames > 0) { // Produce the filtered ARF frame. // TODO(agrange) merge these two functions. - configure_arnr_filter(cpi, cm->current_video_frame + frames_to_arf, - cpi->rc.gfu_boost); + vp9_configure_arnr_filter(cpi, frames_to_arf, cpi->rc.gfu_boost); vp9_temporal_filter_prepare(cpi, frames_to_arf); vp9_extend_frame_borders(&cpi->alt_ref_buffer, cm->subsampling_x, cm->subsampling_y); @@ -3466,7 +3481,9 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, #if CONFIG_MULTIPLE_ARF if (!cpi->multi_arf_enabled) #endif - cpi->rc.source_alt_ref_pending = 0; // Clear Pending altf Ref flag. + cpi->rc.source_alt_ref_pending = 0; + } else { + cpi->rc.source_alt_ref_pending = 0; } } @@ -3585,11 +3602,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, vp9_extend_frame_borders(buf, cm->subsampling_x, cm->subsampling_y); } - vp9_setup_interp_filters(&cpi->mb.e_mbd, DEFAULT_INTERP_FILTER, cm); + set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); + xd->interp_kernel = vp9_get_interp_kernel( + DEFAULT_INTERP_FILTER == SWITCHABLE ? EIGHTTAP : DEFAULT_INTERP_FILTER); - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - vp9_vaq_init(); - } + if (cpi->oxcf.aq_mode == VARIANCE_AQ) + vp9_vaq_init(); if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9cf3f62d6..d2f42dd3e 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -8,25 +8,28 @@ * be found in the AUTHORS file in the root of the source tree. */ - #ifndef VP9_ENCODER_VP9_ONYX_INT_H_ #define VP9_ENCODER_VP9_ONYX_INT_H_ #include <stdio.h> + #include "./vpx_config.h" +#include "vpx_ports/mem.h" +#include "vpx/internal/vpx_codec_internal.h" + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_onyx.h" -#include "vp9/encoder/vp9_treewriter.h" -#include "vp9/encoder/vp9_tokenize.h" #include "vp9/common/vp9_onyxc_int.h" -#include "vp9/encoder/vp9_variance.h" + #include "vp9/encoder/vp9_encodemb.h" -#include "vp9/encoder/vp9_quantize.h" -#include "vp9/common/vp9_entropy.h" -#include "vp9/common/vp9_entropymode.h" -#include "vpx_ports/mem.h" -#include "vpx/internal/vpx_codec_internal.h" -#include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_lookahead.h" +#include "vp9/encoder/vp9_mcomp.h" +#include "vp9/encoder/vp9_quantize.h" +#include "vp9/encoder/vp9_ratectrl.h" +#include "vp9/encoder/vp9_tokenize.h" +#include "vp9/encoder/vp9_treewriter.h" +#include "vp9/encoder/vp9_variance.h" #ifdef __cplusplus extern "C" { @@ -98,18 +101,6 @@ typedef struct { } FIRSTPASS_STATS; typedef struct { - int frames_so_far; - double frame_intra_error; - double frame_coded_error; - double frame_pcnt_inter; - double frame_pcnt_motion; - double frame_mvr; - double frame_mvr_abs; - double frame_mvc; - double frame_mvc_abs; -} ONEPASS_FRAMESTATS; - -typedef struct { struct { int err; union { @@ -190,6 +181,12 @@ typedef enum { } TX_SIZE_SEARCH_METHOD; typedef enum { + NOT_IN_USE = 0, + RELAXED_NEIGHBORING_MIN_MAX = 1, + STRICT_NEIGHBORING_MIN_MAX = 2 +} AUTO_MIN_MAX_MODE; + +typedef enum { // Values should be powers of 2 so that they can be selected as bits of // an integer flags field @@ -256,6 +253,9 @@ typedef struct { // Maximum number of steps in logarithmic subpel search before giving up. int subpel_iters_per_step; + // Control when to stop subpel search + int subpel_force_stop; + // Thresh_mult is used to set a threshold for the rd score. A higher value // means that we will accept the best mode so far more often. This number // is used in combination with the current block size, and thresh_freq_fact @@ -343,9 +343,8 @@ typedef struct { BLOCK_SIZE always_this_block_size; // Sets min and max partition sizes for this 64x64 region based on the - // same superblock in last encoded frame, and the left and above neighbor - // in this block. - int auto_min_max_partition_size; + // same 64x64 in last encoded frame, and the left and above neighbor. + AUTO_MIN_MAX_MODE auto_min_max_partition_size; // Min and max partition size we enable (block_size) as per auto // min max, but also used by adjust partitioning, and pick_partitioning. @@ -419,67 +418,6 @@ typedef struct { int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced } SPEED_FEATURES; -typedef struct { - // Rate targetting variables - int this_frame_target; - int projected_frame_size; - int sb64_target_rate; - int last_q[3]; // Separate values for Intra/Inter/ARF-GF - int last_boosted_qindex; // Last boosted GF/KF/ARF q - - int gfu_boost; - int last_boost; - int kf_boost; - - double rate_correction_factor; - double key_frame_rate_correction_factor; - double gf_rate_correction_factor; - - unsigned int frames_since_golden; - unsigned int frames_till_gf_update_due; // Count down till next GF - unsigned int max_gf_interval; - unsigned int baseline_gf_interval; - unsigned int frames_to_key; - unsigned int frames_since_key; - unsigned int this_key_frame_forced; - unsigned int next_key_frame_forced; - unsigned int source_alt_ref_pending; - unsigned int source_alt_ref_active; - unsigned int is_src_frame_alt_ref; - - int per_frame_bandwidth; // Current section per frame bandwidth target - int av_per_frame_bandwidth; // Average frame size target for clip - int min_frame_bandwidth; // Minimum allocation used for any frame - int max_frame_bandwidth; // Maximum burst rate allowed for a frame. - - int ni_av_qi; - int ni_tot_qi; - int ni_frames; - int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF - double tot_q; - double avg_q; - - int buffer_level; - int bits_off_target; - - int decimation_factor; - int decimation_count; - - int rolling_target_bits; - int rolling_actual_bits; - - int long_rolling_target_bits; - int long_rolling_actual_bits; - - int64_t total_actual_bits; - int total_target_vs_actual; // debug stats - - int worst_quality; - int active_worst_quality; - int best_quality; - // int active_best_quality; -} RATE_CONTROL; - typedef struct VP9_COMP { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); @@ -606,11 +544,6 @@ typedef struct VP9_COMP { int64_t target_bandwidth; struct vpx_codec_pkt_list *output_pkt_list; -#if 0 - // Experimental code for lagged and one pass - ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; - int one_pass_frame_index; -#endif MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation @@ -821,6 +754,14 @@ static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); } +static void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, + MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { + xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME + : 0]; + xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME + : 0]; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 210d15f0d..2b9e31f08 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -80,7 +80,7 @@ static int full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, step_param = 6; further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { + for (i = LAST_FRAME; i <= LAST_FRAME && cpi->common.show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { tmp_mv->as_int = INVALID_MV; @@ -142,8 +142,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *returnrate, int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx) { + BLOCK_SIZE bsize) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi; const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]); @@ -155,6 +154,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, VP9_ALT_FLAG }; int64_t best_rd = INT64_MAX; int64_t this_rd; + int64_t cost[4]= { 0, 100, 150, 205 }; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; @@ -171,7 +171,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = MIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { vp9_setup_buffer_inter(cpi, x, tile, @@ -182,7 +182,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { int rate_mv = 0; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) @@ -191,29 +191,42 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Select prediction reference frames. xd->plane[0].pre[0] = yv12_mb[ref_frame][0]; - - x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = - full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], &rate_mv); - - if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) - continue; - clamp_mv2(&frame_mv[NEARESTMV][ref_frame].as_mv, xd); clamp_mv2(&frame_mv[NEARMV][ref_frame].as_mv, xd); for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - int rate = x->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; - int64_t dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] * - x->mode_sad[ref_frame][INTER_OFFSET(this_mode)]; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); + int rate = cost[this_mode - NEARESTMV]; + int64_t dist; + + if (this_mode == NEWMV) { + if (this_rd < 300) + continue; + + x->mode_sad[ref_frame][INTER_OFFSET(NEWMV)] = + full_pixel_motion_search(cpi, x, tile, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv); + + if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) + continue; + } + + dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)]; + this_rd = rate + dist; if (this_rd < best_rd) { best_rd = this_rd; mbmi->mode = this_mode; mbmi->ref_frame[0] = ref_frame; mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int; + mbmi->interp_filter = EIGHTTAP; + + mbmi->ref_frame[1] = INTRA_FRAME; + mbmi->tx_size = max_txsize_lookup[bsize]; + mbmi->uv_mode = this_mode; + mbmi->skip_coeff = 0; + mbmi->sb_type = bsize; + mbmi->segment_id = 0; } } } @@ -223,8 +236,5 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // TODO(jingning) intra prediction search, if the best SAD is above a certain // threshold. - // store mode decisions - ctx->mic = *xd->mi_8x8[0]; - return INT64_MAX; } diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h index 82904ae8a..05ff18762 100644 --- a/vp9/encoder/vp9_pickmode.h +++ b/vp9/encoder/vp9_pickmode.h @@ -22,8 +22,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *returnrate, int64_t *returndistortion, - BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx); + BLOCK_SIZE bsize); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 3c816a3d0..74eb98fb0 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -218,7 +218,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { vp9_clear_system_state(); // __asm emms; // For 1-pass. - if (cpi->pass == 0) { + if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->common.current_video_frame == 0) { target = oxcf->starting_buffer_level / 2; } else { @@ -246,7 +246,7 @@ static void calc_iframe_target_size(VP9_COMP *cpi) { if (oxcf->rc_max_intra_bitrate_pct) { const int max_rate = rc->per_frame_bandwidth * - oxcf->rc_max_intra_bitrate_pct / 100; + oxcf->rc_max_intra_bitrate_pct / 100; target = MIN(target, max_rate); } rc->this_frame_target = target; @@ -375,27 +375,22 @@ static int target_size_from_buffer_level(const VP9_CONFIG *oxcf, static void calc_pframe_target_size(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; const VP9_CONFIG *const oxcf = &cpi->oxcf; - int min_frame_target = MAX(rc->min_frame_bandwidth, - rc->av_per_frame_bandwidth >> 5); - if (cpi->refresh_alt_ref_frame) { - // Special alt reference frame case - // Per frame bit target for the alt ref frame - rc->per_frame_bandwidth = cpi->twopass.gf_bits; - rc->this_frame_target = rc->per_frame_bandwidth; - } else { - // Normal frames (gf and inter). - rc->this_frame_target = rc->per_frame_bandwidth; - // Set target frame size based on buffer level, for 1 pass CBR. - if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { - // Need to decide how low min_frame_target should be for 1-pass CBR. - // For now, use: cpi->rc.av_per_frame_bandwidth / 16: - min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, - FRAME_OVERHEAD_BITS); - rc->this_frame_target = target_size_from_buffer_level(oxcf, rc); - // Adjust qp-max based on buffer level. - rc->active_worst_quality = - adjust_active_worst_quality_from_buffer_level(oxcf, rc); - } + int min_frame_target; + rc->this_frame_target = rc->per_frame_bandwidth; + + if (cpi->pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + // Need to decide how low min_frame_target should be for 1-pass CBR. + // For now, use: cpi->rc.av_per_frame_bandwidth / 16: + min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, + FRAME_OVERHEAD_BITS); + rc->this_frame_target = target_size_from_buffer_level(oxcf, rc); + // Adjust qp-max based on buffer level. + rc->active_worst_quality = + adjust_active_worst_quality_from_buffer_level(oxcf, rc); + + if (rc->this_frame_target < min_frame_target) + rc->this_frame_target = min_frame_target; + return; } // Check that the total sum of adjustments is not above the maximum allowed. @@ -404,6 +399,9 @@ static void calc_pframe_target_size(VP9_COMP *const cpi) { // not capable of recovering all the extra bits we have spent in the KF or GF, // then the remainder will have to be recovered over a longer time span via // other buffer / rate control mechanisms. + min_frame_target = MAX(rc->min_frame_bandwidth, + rc->av_per_frame_bandwidth >> 5); + if (rc->this_frame_target < min_frame_target) rc->this_frame_target = min_frame_target; diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 65ddead11..eba4b7a92 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -12,66 +12,127 @@ #ifndef VP9_ENCODER_VP9_RATECTRL_H_ #define VP9_ENCODER_VP9_RATECTRL_H_ -#include "vp9/encoder/vp9_onyx_int.h" - #ifdef __cplusplus extern "C" { #endif #define FRAME_OVERHEAD_BITS 200 -void vp9_save_coding_context(VP9_COMP *cpi); -void vp9_restore_coding_context(VP9_COMP *cpi); - -void vp9_setup_key_frame(VP9_COMP *cpi); -void vp9_setup_inter_frame(VP9_COMP *cpi); +typedef struct { + // Rate targetting variables + int this_frame_target; + int projected_frame_size; + int sb64_target_rate; + int last_q[3]; // Separate values for Intra/Inter/ARF-GF + int last_boosted_qindex; // Last boosted GF/KF/ARF q + + int gfu_boost; + int last_boost; + int kf_boost; + + double rate_correction_factor; + double key_frame_rate_correction_factor; + double gf_rate_correction_factor; + + unsigned int frames_since_golden; + unsigned int frames_till_gf_update_due; // Count down till next GF + unsigned int max_gf_interval; + unsigned int baseline_gf_interval; + unsigned int frames_to_key; + unsigned int frames_since_key; + unsigned int this_key_frame_forced; + unsigned int next_key_frame_forced; + unsigned int source_alt_ref_pending; + unsigned int source_alt_ref_active; + unsigned int is_src_frame_alt_ref; + + int per_frame_bandwidth; // Current section per frame bandwidth target + int av_per_frame_bandwidth; // Average frame size target for clip + int min_frame_bandwidth; // Minimum allocation used for any frame + int max_frame_bandwidth; // Maximum burst rate allowed for a frame. + + int ni_av_qi; + int ni_tot_qi; + int ni_frames; + int avg_frame_qindex[3]; // 0 - KEY, 1 - INTER, 2 - ARF/GF + double tot_q; + double avg_q; + + int buffer_level; + int bits_off_target; + + int decimation_factor; + int decimation_count; + + int rolling_target_bits; + int rolling_actual_bits; + + int long_rolling_target_bits; + int long_rolling_actual_bits; + + int64_t total_actual_bits; + int total_target_vs_actual; // debug stats + + int worst_quality; + int active_worst_quality; + int best_quality; + // int active_best_quality; +} RATE_CONTROL; + +struct VP9_COMP; + +void vp9_save_coding_context(struct VP9_COMP *cpi); +void vp9_restore_coding_context(struct VP9_COMP *cpi); + +void vp9_setup_key_frame(struct VP9_COMP *cpi); +void vp9_setup_inter_frame(struct VP9_COMP *cpi); double vp9_convert_qindex_to_q(int qindex); // Updates rate correction factors -void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var); +void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi, int damp_var); // initialize luts for minq void vp9_rc_init_minq_luts(void); // return of 0 means drop frame // Changes only rc.this_frame_target and rc.sb64_rate_target -int vp9_rc_pick_frame_size_target(VP9_COMP *cpi); +int vp9_rc_pick_frame_size_target(struct VP9_COMP *cpi); -void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, +void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi, int this_frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit); // Picks q and q bounds given the target for bits -int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, +int vp9_rc_pick_q_and_adjust_q_bounds(const struct VP9_COMP *cpi, int *bottom_index, int *top_index); // Estimates q to achieve a target bits per frame -int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, +int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality); // Post encode update of the rate control parameters based // on bytes used -void vp9_rc_postencode_update(VP9_COMP *cpi, +void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // for dropped frames -void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi); +void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); // estimates bits per mb for a given qindex and correction factor int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, double correction_factor); // Post encode update of the rate control parameters for 2-pass -void vp9_twopass_postencode_update(VP9_COMP *cpi, +void vp9_twopass_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // Decide if we should drop this frame: For 1-pass CBR. -int vp9_drop_frame(VP9_COMP *cpi); +int vp9_drop_frame(struct VP9_COMP *cpi); // Update the buffer level. -void vp9_update_buffer_level(VP9_COMP *cpi, int encoded_frame_size); +void vp9_update_buffer_level(struct VP9_COMP *cpi, int encoded_frame_size); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3be79f46b..f375a88ff 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -280,22 +280,24 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { fill_token_costs(x->token_costs, cm->fc.coef_probs); - for (i = 0; i < PARTITION_CONTEXTS; i++) - vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), - vp9_partition_tree); - - fill_mode_costs(cpi); - - if (!frame_is_intra_only(cm)) { - vp9_build_nmv_cost_table(x->nmvjointcost, - cm->allow_high_precision_mv ? x->nmvcost_hp - : x->nmvcost, - &cm->fc.nmvc, - cm->allow_high_precision_mv, 1, 1); - - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->inter_mode_cost[i], - cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); + if (cpi->compressor_speed != 3) { + for (i = 0; i < PARTITION_CONTEXTS; i++) + vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i), + vp9_partition_tree); + + fill_mode_costs(cpi); + + if (!frame_is_intra_only(cm)) { + vp9_build_nmv_cost_table(x->nmvjointcost, + cm->allow_high_precision_mv ? x->nmvcost_hp + : x->nmvcost, + &cm->fc.nmvc, + cm->allow_high_precision_mv, 1, 1); + + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + vp9_cost_tokens((int *)x->inter_mode_cost[i], + cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); + } } } @@ -419,12 +421,22 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); + (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); + if (i == 0) x->pred_sse[ref] = sse; - - dist_sum += (int)sse; + if (cpi->compressor_speed > 2) { + dist_sum += (int)sse; + } else { + int rate; + int64_t dist; + model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); + rate_sum += rate; + dist_sum += (int)dist; + } } *out_rate_sum = rate_sum; @@ -1517,8 +1529,8 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi, vp9_build_inter_predictor(pre, pd->pre[ref].stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, - &xd->block_refs[ref]->sf, - width, height, ref, &xd->subpix, MV_PRECISION_Q3, + &xd->block_refs[ref]->sf, width, height, ref, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } @@ -1840,7 +1852,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, &bsi->ref_mv->as_mv, cm->allow_high_precision_mv, x->errorperbit, v_fn_ptr, - 0, cpi->sf.subpel_iters_per_step, + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]]); @@ -2451,7 +2464,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - 0, cpi->sf.subpel_iters_per_step, + cpi->sf.subpel_force_stop, + cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref]); } @@ -2466,6 +2480,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } + return; } static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -2536,7 +2551,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[refs[!id]].as_mv, &xd->block_refs[!id]->sf, pw, ph, 0, - &xd->subpix, MV_PRECISION_Q3, + xd->interp_kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); // Compound motion search on first ref frame. @@ -2783,7 +2798,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); rs = get_switchable_rate(x); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); @@ -2854,7 +2869,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0; if (pred_exists) { @@ -3277,13 +3292,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; } - set_ref_ptrs(cm, xd, ref_frame - 1, second_ref_frame - 1); + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mbmi->uv_mode = DC_PRED; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->interp_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -3709,7 +3725,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : @@ -3902,13 +3918,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) continue; - set_ref_ptrs(cm, xd, ref_frame - 1, second_ref_frame - 1); + set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mbmi->uv_mode = DC_PRED; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. - mbmi->interp_filter = cm->interp_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP + : cm->interp_filter; + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (comp_pred) { if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) @@ -4031,8 +4048,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int newbest, rs; int64_t rs_rd; mbmi->interp_filter = switchable_filter_index; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile, &mbmi->ref_mvs[ref_frame][0], second_ref, @@ -4097,7 +4113,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level @@ -4442,7 +4458,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, vp9_zero(best_tx_diff); } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1); + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); store_coding_context(x, ctx, best_mode_index, &mbmi->ref_mvs[mbmi->ref_frame[0]][0], &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 : diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index c2eea0aaa..e822e4c64 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -60,7 +60,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, 16, 16, which_mv, - &xd->subpix, MV_PRECISION_Q3, x, y); + xd->interp_kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_size, @@ -68,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_size, @@ -76,7 +76,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, scale, uv_block_size, uv_block_size, which_mv, - &xd->subpix, mv_precision_uv, x, y); + xd->interp_kernel, mv_precision_uv, x, y); } void vp9_temporal_filter_apply_c(uint8_t *frame1, @@ -392,7 +392,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { const int num_frames_backward = distance; const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - (num_frames_backward + 1); - struct scale_factors sf; switch (blur_type) { @@ -408,7 +407,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { case 2: // Forward Blur - frames_to_blur_forward = num_frames_forward; if (frames_to_blur_forward >= max_frames) @@ -471,22 +469,24 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { strength, &sf); } -void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, - const int group_boost) { +void vp9_configure_arnr_filter(VP9_COMP *cpi, + const unsigned int frames_to_arnr, + const int group_boost) { int half_gf_int; int frames_after_arf; int frames_bwd = cpi->oxcf.arnr_max_frames - 1; int frames_fwd = cpi->oxcf.arnr_max_frames - 1; int q; - // Define the arnr filter width for this group of frames: - // We only filter frames that lie within a distance of half - // the GF interval from the ARF frame. We also have to trap - // cases where the filter extends beyond the end of clip. - // Note: this_frame->frame has been updated in the loop - // so it now points at the ARF frame. + // Define the arnr filter width for this group of frames. We only + // filter frames that lie within a distance of half the GF interval + // from the ARF frame. We also have to trap cases where the filter + // extends beyond the end of the lookahead buffer. + // Note: frames_to_arnr parameter is the offset of the arnr + // frame from the current frame. half_gf_int = cpi->rc.baseline_gf_interval >> 1; - frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); + frames_after_arf = vp9_lookahead_depth(cpi->lookahead) + - frames_to_arnr - 1; switch (cpi->oxcf.arnr_type) { case 1: // Backward filter diff --git a/vp9/encoder/vp9_temporal_filter.h b/vp9/encoder/vp9_temporal_filter.h index eea2f1018..3028d7884 100644 --- a/vp9/encoder/vp9_temporal_filter.h +++ b/vp9/encoder/vp9_temporal_filter.h @@ -16,8 +16,9 @@ extern "C" { #endif void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance); -void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, - const int group_boost); +void vp9_configure_arnr_filter(VP9_COMP *cpi, + const unsigned int frames_to_arnr, + const int group_boost); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c index d81b72bba..ea031fb07 100644 --- a/vp9/encoder/x86/vp9_dct_avx2.c +++ b/vp9/encoder/x86/vp9_dct_avx2.c @@ -163,7 +163,7 @@ static INLINE void transpose_4x4_avx2(__m128i *res) { res[3] = _mm_unpackhi_epi64(res[2], res[2]); } -void fdct4_1d_avx2(__m128i *in) { +void fdct4_avx2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); @@ -196,7 +196,7 @@ void fdct4_1d_avx2(__m128i *in) { transpose_4x4_avx2(in); } -void fadst4_1d_avx2(__m128i *in) { +void fadst4_avx2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); @@ -250,20 +250,20 @@ void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, load_buffer_4x4_avx2(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct4_1d_avx2(in); - fdct4_1d_avx2(in); + fdct4_avx2(in); + fdct4_avx2(in); break; case 1: // ADST_DCT - fadst4_1d_avx2(in); - fdct4_1d_avx2(in); + fadst4_avx2(in); + fdct4_avx2(in); break; case 2: // DCT_ADST - fdct4_1d_avx2(in); - fadst4_1d_avx2(in); + fdct4_avx2(in); + fadst4_avx2(in); break; case 3: // ADST_ADST - fadst4_1d_avx2(in); - fadst4_1d_avx2(in); + fadst4_avx2(in); + fadst4_avx2(in); break; default: assert(0); @@ -658,7 +658,7 @@ static INLINE void array_transpose_8x8_avx2(__m128i *in, __m128i *res) { // 07 17 27 37 47 57 67 77 } -void fdct8_1d_avx2(__m128i *in) { +void fdct8_avx2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -798,7 +798,7 @@ void fdct8_1d_avx2(__m128i *in) { array_transpose_8x8_avx2(in, in); } -void fadst8_1d_avx2(__m128i *in) { +void fadst8_avx2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1034,20 +1034,20 @@ void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, load_buffer_8x8_avx2(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct8_1d_avx2(in); - fdct8_1d_avx2(in); + fdct8_avx2(in); + fdct8_avx2(in); break; case 1: // ADST_DCT - fadst8_1d_avx2(in); - fdct8_1d_avx2(in); + fadst8_avx2(in); + fdct8_avx2(in); break; case 2: // DCT_ADST - fdct8_1d_avx2(in); - fadst8_1d_avx2(in); + fdct8_avx2(in); + fadst8_avx2(in); break; case 3: // ADST_ADST - fadst8_1d_avx2(in); - fadst8_1d_avx2(in); + fadst8_avx2(in); + fadst8_avx2(in); break; default: assert(0); @@ -1216,7 +1216,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) { step1_6 = _mm_sub_epi16(in01, in14); step1_7 = _mm_sub_epi16(in00, in15); } - // Work on the first eight values; fdct8_1d(input, even_results); + // Work on the first eight values; fdct8(input, even_results); { // Add/substract const __m128i q0 = _mm_add_epi16(input0, input7); @@ -1730,7 +1730,7 @@ static INLINE void right_shift_16x16_avx2(__m128i *res0, __m128i *res1) { right_shift_8x8_avx2(res1 + 8, 2); } -void fdct16_1d_8col_avx2(__m128i *in) { +void fdct16_8col_avx2(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); @@ -2052,7 +2052,7 @@ void fdct16_1d_8col_avx2(__m128i *in) { in[15] = _mm_packs_epi32(v[14], v[15]); } -void fadst16_1d_8col_avx2(__m128i *in) { +void fadst16_8col_avx2(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2522,15 +2522,15 @@ void fadst16_1d_8col_avx2(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -void fdct16_1d_avx2(__m128i *in0, __m128i *in1) { - fdct16_1d_8col_avx2(in0); - fdct16_1d_8col_avx2(in1); +void fdct16_avx2(__m128i *in0, __m128i *in1) { + fdct16_8col_avx2(in0); + fdct16_8col_avx2(in1); array_transpose_16x16_avx2(in0, in1); } -void fadst16_1d_avx2(__m128i *in0, __m128i *in1) { - fadst16_1d_8col_avx2(in0); - fadst16_1d_8col_avx2(in1); +void fadst16_avx2(__m128i *in0, __m128i *in1) { + fadst16_8col_avx2(in0); + fadst16_8col_avx2(in1); array_transpose_16x16_avx2(in0, in1); } @@ -2540,24 +2540,24 @@ void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output, load_buffer_16x16_avx2(input, in0, in1, stride); switch (tx_type) { case 0: // DCT_DCT - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); break; case 1: // ADST_DCT - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); break; case 2: // DCT_ADST - fdct16_1d_avx2(in0, in1); + fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); break; case 3: // ADST_ADST - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); - fadst16_1d_avx2(in0, in1); + fadst16_avx2(in0, in1); break; default: assert(0); diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index 65431bdbf..c876cc273 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -161,7 +161,7 @@ static INLINE void transpose_4x4(__m128i *res) { res[3] = _mm_unpackhi_epi64(res[2], res[2]); } -void fdct4_1d_sse2(__m128i *in) { +void fdct4_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); @@ -194,7 +194,7 @@ void fdct4_1d_sse2(__m128i *in) { transpose_4x4(in); } -void fadst4_1d_sse2(__m128i *in) { +void fadst4_sse2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); @@ -248,20 +248,20 @@ void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, load_buffer_4x4(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct4_1d_sse2(in); - fdct4_1d_sse2(in); + fdct4_sse2(in); + fdct4_sse2(in); break; case 1: // ADST_DCT - fadst4_1d_sse2(in); - fdct4_1d_sse2(in); + fadst4_sse2(in); + fdct4_sse2(in); break; case 2: // DCT_ADST - fdct4_1d_sse2(in); - fadst4_1d_sse2(in); + fdct4_sse2(in); + fadst4_sse2(in); break; case 3: // ADST_ADST - fadst4_1d_sse2(in); - fadst4_1d_sse2(in); + fadst4_sse2(in); + fadst4_sse2(in); break; default: assert(0); @@ -656,7 +656,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) { // 07 17 27 37 47 57 67 77 } -void fdct8_1d_sse2(__m128i *in) { +void fdct8_sse2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); @@ -796,7 +796,7 @@ void fdct8_1d_sse2(__m128i *in) { array_transpose_8x8(in, in); } -void fadst8_1d_sse2(__m128i *in) { +void fadst8_sse2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); @@ -1032,20 +1032,20 @@ void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, load_buffer_8x8(input, in, stride); switch (tx_type) { case 0: // DCT_DCT - fdct8_1d_sse2(in); - fdct8_1d_sse2(in); + fdct8_sse2(in); + fdct8_sse2(in); break; case 1: // ADST_DCT - fadst8_1d_sse2(in); - fdct8_1d_sse2(in); + fadst8_sse2(in); + fdct8_sse2(in); break; case 2: // DCT_ADST - fdct8_1d_sse2(in); - fadst8_1d_sse2(in); + fdct8_sse2(in); + fadst8_sse2(in); break; case 3: // ADST_ADST - fadst8_1d_sse2(in); - fadst8_1d_sse2(in); + fadst8_sse2(in); + fadst8_sse2(in); break; default: assert(0); @@ -1214,7 +1214,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { step1_6 = _mm_sub_epi16(in01, in14); step1_7 = _mm_sub_epi16(in00, in15); } - // Work on the first eight values; fdct8_1d(input, even_results); + // Work on the first eight values; fdct8(input, even_results); { // Add/substract const __m128i q0 = _mm_add_epi16(input0, input7); @@ -1728,7 +1728,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { right_shift_8x8(res1 + 8, 2); } -void fdct16_1d_8col(__m128i *in) { +void fdct16_8col(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); @@ -2050,7 +2050,7 @@ void fdct16_1d_8col(__m128i *in) { in[15] = _mm_packs_epi32(v[14], v[15]); } -void fadst16_1d_8col(__m128i *in) { +void fadst16_8col(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); @@ -2520,15 +2520,15 @@ void fadst16_1d_8col(__m128i *in) { in[15] = _mm_sub_epi16(kZero, s[1]); } -void fdct16_1d_sse2(__m128i *in0, __m128i *in1) { - fdct16_1d_8col(in0); - fdct16_1d_8col(in1); +void fdct16_sse2(__m128i *in0, __m128i *in1) { + fdct16_8col(in0); + fdct16_8col(in1); array_transpose_16x16(in0, in1); } -void fadst16_1d_sse2(__m128i *in0, __m128i *in1) { - fadst16_1d_8col(in0); - fadst16_1d_8col(in1); +void fadst16_sse2(__m128i *in0, __m128i *in1) { + fadst16_8col(in0); + fadst16_8col(in1); array_transpose_16x16(in0, in1); } @@ -2538,24 +2538,24 @@ void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, load_buffer_16x16(input, in0, in1, stride); switch (tx_type) { case 0: // DCT_DCT - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); break; case 1: // ADST_DCT - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); break; case 2: // DCT_ADST - fdct16_1d_sse2(in0, in1); + fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); break; case 3: // ADST_ADST - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); - fadst16_1d_sse2(in0, in1); + fadst16_sse2(in0, in1); break; default: assert(0); |