diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_dct.c | 179 | ||||
-rw-r--r-- | vp9/encoder/vp9_dct.h | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 249 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.h | 82 | ||||
-rw-r--r-- | vp9/encoder/vp9_lookahead.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 26 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 381 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 112 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 102 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 283 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 40 | ||||
-rw-r--r-- | vp9/encoder/vp9_sad.c (renamed from vp9/encoder/vp9_sad_c.c) | 0 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_avx2.c | 77 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_sse2.c | 83 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_quantize_ssse3.asm | 5 |
17 files changed, 894 insertions, 775 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index a840b480a..a9d168cc8 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -18,8 +18,6 @@ #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_dct.h" - static INLINE int fdct_round_shift(int input) { int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); assert(INT16_MIN <= rv && rv <= INT16_MAX); @@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = { { fadst4, fadst4 } // ADST_ADST = 3 }; -void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[4 * 4]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[4], temp_out[4]; - const transform_2d ht = FHT_4[tx_type]; +void vp9_fht4x4_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct4x4_c(input, output, stride); + } else { + int16_t out[4 * 4]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[4], temp_out[4]; + const transform_2d ht = FHT_4[tx_type]; - // Columns - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = input[j * stride + i] * 16; - if (i == 0 && temp_in[0]) - temp_in[0] += 1; - ht.cols(temp_in, temp_out); - for (j = 0; j < 4; ++j) - outptr[j * 4 + i] = temp_out[j]; - } + // Columns + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = input[j * stride + i] * 16; + if (i == 0 && temp_in[0]) + temp_in[0] += 1; + ht.cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) + outptr[j * 4 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - temp_in[j] = out[j + i * 4]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 4; ++j) - output[j + i * 4] = (temp_out[j] + 1) >> 2; + // Rows + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j + i * 4]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 4; ++j) + output[j + i * 4] = (temp_out[j] + 1) >> 2; + } } } @@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = { { fadst8, fadst8 } // ADST_ADST = 3 }; -void vp9_short_fht8x8_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[64]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[8], temp_out[8]; - const transform_2d ht = FHT_8[tx_type]; - - // Columns - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 8; ++j) - outptr[j * 8 + i] = temp_out[j]; - } +void vp9_fht8x8_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct8x8_c(input, output, stride); + } else { + int16_t out[64]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[8], temp_out[8]; + const transform_2d ht = FHT_8[tx_type]; + + // Columns + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 8; ++j) + outptr[j * 8 + i] = temp_out[j]; + } - // Rows - for (i = 0; i < 8; ++i) { - for (j = 0; j < 8; ++j) - temp_in[j] = out[j + i * 8]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 8; ++j) - output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + // Rows + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j + i * 8]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 8; ++j) + output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; + } } } @@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = { { fadst16, fadst16 } // ADST_ADST = 3 }; -void vp9_short_fht16x16_c(const int16_t *input, int16_t *output, - int stride, int tx_type) { - int16_t out[256]; - int16_t *outptr = &out[0]; - int i, j; - int16_t temp_in[16], temp_out[16]; - const transform_2d ht = FHT_16[tx_type]; - - // Columns - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = input[j * stride + i] * 4; - ht.cols(temp_in, temp_out); - for (j = 0; j < 16; ++j) - outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; -// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; - } +void vp9_fht16x16_c(const int16_t *input, int16_t *output, + int stride, int tx_type) { + if (tx_type == DCT_DCT) { + vp9_fdct16x16_c(input, output, stride); + } else { + int16_t out[256]; + int16_t *outptr = &out[0]; + int i, j; + int16_t temp_in[16], temp_out[16]; + const transform_2d ht = FHT_16[tx_type]; + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = input[j * stride + i] * 4; + ht.cols(temp_in, temp_out); + for (j = 0; j < 16; ++j) + outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; + } - // Rows - for (i = 0; i < 16; ++i) { - for (j = 0; j < 16; ++j) - temp_in[j] = out[j + i * 16]; - ht.rows(temp_in, temp_out); - for (j = 0; j < 16; ++j) - output[j + i * 16] = temp_out[j]; + // Rows + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j + i * 16]; + ht.rows(temp_in, temp_out); + for (j = 0; j < 16; ++j) + output[j + i * 16] = temp_out[j]; + } } } @@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) { out[j + i * 32] = temp_out[j]; } } - -void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct4x4(input, output, stride); - else - vp9_short_fht4x4(input, output, stride, tx_type); -} - -void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct8x8(input, output, stride); - else - vp9_short_fht8x8(input, output, stride, tx_type); -} - -void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride) { - if (tx_type == DCT_DCT) - vp9_fdct16x16(input, output, stride); - else - vp9_short_fht16x16(input, output, stride, tx_type); -} diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h deleted file mode 100644 index cf5f001a9..000000000 --- a/vp9/encoder/vp9_dct.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2013 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_ENCODER_VP9_DCT_H_ -#define VP9_ENCODER_VP9_DCT_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, - int stride); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // VP9_ENCODER_VP9_DCT_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index eaedf1e07..442170f0c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) { } // Macroblock activity masking -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { +static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); @@ -491,24 +491,26 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } else { // Note how often each mode chosen as best cpi->mode_chosen_counts[mb_mode_index]++; - if (is_inter_block(mbmi) && - (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { - int_mv best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; - vp9_update_mv_count(cpi, x, best_mv); - } - if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { - const int ctx = vp9_get_pred_context_switchable_interp(xd); - ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + if (is_inter_block(mbmi)) { + if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { + int_mv best_mv[2]; + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) + best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; + vp9_update_mv_count(cpi, x, best_mv); + } + + if (cm->interp_filter == SWITCHABLE) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } } cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) cpi->rd_filter_diff[i] += ctx->best_filter_diff[i]; } } @@ -671,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, } if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); + activity_masking(cpi, x); if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_clear_system_state(); // __asm emms; @@ -711,36 +713,40 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, static void update_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *mi = xd->mi_8x8[0]; - MB_MODE_INFO *const mbmi = &mi->mbmi; + const MACROBLOCK *const x = &cpi->mb; + const MACROBLOCKD *const xd = &x->e_mbd; + const MODE_INFO *const mi = xd->mi_8x8[0]; + const MB_MODE_INFO *const mbmi = &mi->mbmi; if (!frame_is_intra_only(cm)) { const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); + if (!seg_ref_active) { + FRAME_COUNTS *const counts = &cm->counts; + const int inter_block = is_inter_block(mbmi); - if (!seg_ref_active) - cm->counts.intra_inter[vp9_get_intra_inter_context(xd)] - [is_inter_block(mbmi)]++; - - // If the segment reference feature is enabled we have only a single - // reference frame allowed for the segment so exclude it from - // the reference frame counts used to work out probabilities. - if (is_inter_block(mbmi) && !seg_ref_active) { - if (cm->reference_mode == REFERENCE_MODE_SELECT) - cm->counts.comp_inter[vp9_get_reference_mode_context(cm, xd)] - [has_second_ref(mbmi)]++; - - if (has_second_ref(mbmi)) { - cm->counts.comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)] - [mbmi->ref_frame[0] == GOLDEN_FRAME]++; - } else { - cm->counts.single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] - [mbmi->ref_frame[0] != LAST_FRAME]++; - if (mbmi->ref_frame[0] != LAST_FRAME) - cm->counts.single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] - [mbmi->ref_frame[0] != GOLDEN_FRAME]++; + counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++; + + // If the segment reference feature is enabled we have only a single + // reference frame allowed for the segment so exclude it from + // the reference frame counts used to work out probabilities. + if (inter_block) { + const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] + [has_second_ref(mbmi)]++; + + if (has_second_ref(mbmi)) { + counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)] + [ref0 == GOLDEN_FRAME]++; + } else { + counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] + [ref0 != LAST_FRAME]++; + if (ref0 != LAST_FRAME) + counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] + [ref0 != GOLDEN_FRAME]++; + } } } } @@ -1072,17 +1078,18 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } else { // Note how often each mode chosen as best cpi->mode_chosen_counts[mb_mode_index]++; - if (is_inter_block(mbmi) && - (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { - int_mv best_mv[2]; - for (i = 0; i < 1 + has_second_ref(mbmi); ++i) - best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; - vp9_update_mv_count(cpi, x, best_mv); - } + if (is_inter_block(mbmi)) { + if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) { + int_mv best_mv[2]; + for (i = 0; i < 1 + has_second_ref(mbmi); ++i) + best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int; + vp9_update_mv_count(cpi, x, best_mv); + } - if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) { - const int ctx = vp9_get_pred_context_switchable_interp(xd); - ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + if (cm->interp_filter == SWITCHABLE) { + const int ctx = vp9_get_pred_context_switchable_interp(xd); + ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + } } } } @@ -2181,108 +2188,6 @@ static void switch_tx_mode(VP9_COMP *cpi) { cpi->common.tx_mode = ALLOW_32X32; } -static void encode_frame_internal(VP9_COMP *cpi) { - int mi_row; - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - -// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", -// cpi->common.current_video_frame, cpi->common.show_frame, -// cm->frame_type); - - vp9_zero(cm->counts.switchable_interp); - vp9_zero(cpi->tx_stepdown_count); - - xd->mi_8x8 = cm->mi_grid_visible; - // required for vp9_frame_init_quantizer - xd->mi_8x8[0] = cm->mi; - - xd->last_mi = cm->prev_mi; - - vp9_zero(cm->counts.mv); - vp9_zero(cpi->coef_counts); - vp9_zero(cm->counts.eob_branch); - - cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 - && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless); - - vp9_frame_init_quantizer(cpi); - - vp9_initialize_rd_consts(cpi); - vp9_initialize_me_consts(cpi, cm->base_qindex); - switch_tx_mode(cpi); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Initialize encode frame context. - init_encode_frame_mb_context(cpi); - - // Build a frame level activity map - build_activity_map(cpi); - } - - // Re-initialize encode frame context. - init_encode_frame_mb_context(cpi); - - vp9_zero(cpi->rd_comp_pred_diff); - vp9_zero(cpi->rd_filter_diff); - vp9_zero(cpi->rd_tx_select_diff); - vp9_zero(cpi->rd_tx_select_threshes); - - set_prev_mi(cm); - - { - struct vpx_usec_timer emr_timer; - vpx_usec_timer_start(&emr_timer); - - { - // Take tiles into account and give start/end MB - int tile_col, tile_row; - TOKENEXTRA *tp = cpi->tok; - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - TileInfo tile; - TOKENEXTRA *tp_old = tp; - - // For each row of SBs in the frame - vp9_tile_init(&tile, cm, tile_row, tile_col); - for (mi_row = tile.mi_row_start; - mi_row < tile.mi_row_end; mi_row += 8) - encode_sb_row(cpi, &tile, mi_row, &tp); - - cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); - assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); - } - } - } - - vpx_usec_timer_mark(&emr_timer); - cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); - } - - if (cpi->sf.skip_encode_sb) { - int j; - unsigned int intra_count = 0, inter_count = 0; - for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { - intra_count += cm->counts.intra_inter[j][0]; - inter_count += cm->counts.intra_inter[j][1]; - } - cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count); - cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME); - cpi->sf.skip_encode_frame &= cm->show_frame; - } else { - cpi->sf.skip_encode_frame = 0; - } - -#if 0 - // Keep record of the total distortion this time around for future use - cpi->last_frame_distortion = cpi->frame_distortion; -#endif -} static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; @@ -2572,28 +2477,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, &dummy_rate, &dummy_dist, 1); } } +// end RTC play code - -static void encode_rtc_frame_internal(VP9_COMP *cpi) { +static void encode_frame_internal(VP9_COMP *cpi) { int mi_row; - MACROBLOCK * const x = &cpi->mb; - VP9_COMMON * const cm = &cpi->common; - MACROBLOCKD * const xd = &x->e_mbd; + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; // fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n", // cpi->common.current_video_frame, cpi->common.show_frame, // cm->frame_type); -// debug output -#if DBG_PRNT_SEGMAP - { - FILE *statsfile; - statsfile = fopen("segmap2.stt", "a"); - fprintf(statsfile, "\n"); - fclose(statsfile); - } -#endif - vp9_zero(cm->counts.switchable_interp); vp9_zero(cpi->tx_stepdown_count); @@ -2603,7 +2498,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { xd->last_mi = cm->prev_mi; - vp9_zero(cpi->common.counts.mv); + vp9_zero(cm->counts.mv); vp9_zero(cpi->coef_counts); vp9_zero(cm->counts.eob_branch); @@ -2616,7 +2511,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, cm->base_qindex); switch_tx_mode(cpi); - cpi->sf.always_this_block_size = BLOCK_16X16; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { // Initialize encode frame context. @@ -2655,9 +2549,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { // For each row of SBs in the frame vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; - mi_row < tile.mi_row_end; mi_row += 8) - encode_rtc_sb_row(cpi, &tile, mi_row, &tp); - + mi_row < tile.mi_row_end; mi_row += 8) { + if (cpi->sf.super_fast_rtc) + encode_rtc_sb_row(cpi, &tile, mi_row, &tp); + else + encode_sb_row(cpi, &tile, mi_row, &tp); + } cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); } @@ -2687,8 +2584,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) { cpi->last_frame_distortion = cpi->frame_distortion; #endif } -// end RTC play code - void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -2763,10 +2658,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { select_tx_mode(cpi); cm->reference_mode = reference_mode; - if (cpi->sf.super_fast_rtc) - encode_rtc_frame_internal(cpi); - else - encode_frame_internal(cpi); + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) { const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs); @@ -2846,10 +2738,7 @@ void vp9_encode_frame(VP9_COMP *cpi) { } else { // Force the usage of the BILINEAR interp_filter. cm->interp_filter = BILINEAR; - if (cpi->sf.super_fast_rtc) - encode_rtc_frame_internal(cpi); - else - encode_frame_internal(cpi); + encode_frame_internal(cpi); } } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 376a899e0..c7507c13b 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -19,7 +19,6 @@ #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_systemdependent.h" -#include "vp9/encoder/vp9_dct.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rdopt.h" @@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (!x->skip_recode) { vp9_subtract_block(16, 16, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); - vp9_fht16x16(tx_type, src_diff, coeff, diff_stride); + vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan_order->scan, @@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, if (!x->skip_recode) { vp9_subtract_block(8, 8, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); - vp9_fht8x8(tx_type, src_diff, coeff, diff_stride); + vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, p->zbin_extra, eob, scan_order->scan, @@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_subtract_block(4, 4, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type); + vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); else x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 838f74e8c..dc35044d6 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb, return fclamp(pow(error_term, power_term), 0.05, 5.0); } -static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, - int section_target_bandwitdh) { +int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, + int section_target_bandwitdh) { int q; const int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; @@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); - const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats, - section_target_bandwidth); + const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats, + section_target_bandwidth); rc->active_worst_quality = tmp_q; rc->ni_av_qi = tmp_q; diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 19b59815a..7e612183e 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -10,20 +10,86 @@ #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_ #define VP9_ENCODER_VP9_FIRSTPASS_H_ -#include "vp9/encoder/vp9_onyx_int.h" #ifdef __cplusplus extern "C" { #endif -void vp9_init_first_pass(VP9_COMP *cpi); -void vp9_rc_get_first_pass_params(VP9_COMP *cpi); -void vp9_first_pass(VP9_COMP *cpi); -void vp9_end_first_pass(VP9_COMP *cpi); +typedef struct { + double frame; + double intra_error; + double coded_error; + double sr_coded_error; + double ssim_weighted_pred_err; + double pcnt_inter; + double pcnt_motion; + double pcnt_second_ref; + double pcnt_neutral; + double MVr; + double mvr_abs; + double MVc; + double mvc_abs; + double MVrv; + double MVcv; + double mv_in_out_count; + double new_mv_count; + double duration; + double count; +} FIRSTPASS_STATS; -void vp9_init_second_pass(VP9_COMP *cpi); -void vp9_rc_get_second_pass_params(VP9_COMP *cpi); -void vp9_end_second_pass(VP9_COMP *cpi); +struct twopass_rc { + unsigned int section_intra_rating; + unsigned int next_iiratio; + unsigned int this_iiratio; + FIRSTPASS_STATS total_stats; + FIRSTPASS_STATS this_frame_stats; + FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; + FIRSTPASS_STATS total_left_stats; + int first_pass_done; + int64_t bits_left; + int64_t clip_bits_total; + double avg_iiratio; + double modified_error_min; + double modified_error_max; + double modified_error_total; + double modified_error_left; + double kf_intra_err_min; + double gf_intra_err_min; + int static_scene_max_gf_interval; + int kf_bits; + // Remaining error from uncoded frames in a gf group. Two pass use only + int64_t gf_group_error_left; + + // Projected total bits available for a key frame group of frames + int64_t kf_group_bits; + + // Error score of frames still to be coded in kf group + int64_t kf_group_error_left; + + // Projected Bits available for a group of frames including 1 GF or ARF + int64_t gf_group_bits; + // Bits for the golden frame or ARF - 2 pass only + int gf_bits; + int alt_extra_bits; + + int sr_update_lag; + + int kf_zeromotion_pct; + int gf_zeromotion_pct; +}; + +struct VP9_COMP; + +void vp9_init_first_pass(struct VP9_COMP *cpi); +void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi); +void vp9_first_pass(struct VP9_COMP *cpi); +void vp9_end_first_pass(struct VP9_COMP *cpi); + +void vp9_init_second_pass(struct VP9_COMP *cpi); +void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); +void vp9_end_second_pass(struct VP9_COMP *cpi); +int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats, + int section_target_bandwitdh); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi, diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c index e6e59c05a..4b642e2b6 100644 --- a/vp9/encoder/vp9_lookahead.c +++ b/vp9/encoder/vp9_lookahead.c @@ -11,9 +11,12 @@ #include <stdlib.h> #include "./vpx_config.h" + #include "vp9/common/vp9_common.h" + #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_lookahead.h" +#include "vp9/encoder/vp9_onyx_int.h" struct lookahead_ctx { unsigned int max_sz; /* Absolute size of the queue */ diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 36591bd42..198e11cc2 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -475,11 +475,9 @@ static INLINE int check_bounds(const MACROBLOCK *x, int row, int col, ((col + range) <= x->mv_col_max); } -static INLINE int check_point(const MACROBLOCK *x, const MV *mv) { - return (mv->col < x->mv_col_min) | - (mv->col > x->mv_col_max) | - (mv->row < x->mv_row_min) | - (mv->row > x->mv_row_max); +static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { + return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) && + (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max); } #define CHECK_BETTER \ @@ -572,7 +570,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, for (i = 0; i < num_candidates[t]; i++) { this_mv.row = br + candidates[t][i].row; this_mv.col = bc + candidates[t][i].col; - if (check_point(x, &this_mv)) + if (!is_mv_in(x, &this_mv)) continue; this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; @@ -616,7 +614,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, for (i = 0; i < num_candidates[s]; i++) { this_mv.row = br + candidates[s][i].row; this_mv.col = bc + candidates[s][i].col; - if (check_point(x, &this_mv)) + if (!is_mv_in(x, &this_mv)) continue; this_offset = base_offset + (this_mv.row * in_what_stride) + this_mv.col; @@ -656,7 +654,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; - if (check_point(x, &this_mv)) + if (!is_mv_in(x, &this_mv)) continue; this_offset = base_offset + (this_mv.row * (in_what_stride)) + this_mv.col; @@ -695,7 +693,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, for (i = 0; i < 4; i++) { this_mv.row = br + neighbors[i].row; this_mv.col = bc + neighbors[i].col; - if (check_point(x, &this_mv)) + if (!is_mv_in(x, &this_mv)) continue; this_offset = base_offset + this_mv.row * in_what_stride + this_mv.col; @@ -1685,10 +1683,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x, this_mv.row = ref_mv->row + neighbors[j].row; this_mv.col = ref_mv->col + neighbors[j].col; - if ((this_mv.col > x->mv_col_min) && - (this_mv.col < x->mv_col_max) && - (this_mv.row > x->mv_row_min) && - (this_mv.row < x->mv_row_max)) { + if (is_mv_in(x, &this_mv)) { const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + this_mv.col]; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, @@ -1875,10 +1870,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, this_mv.row = ref_mv->row + neighbors[j].row; this_mv.col = ref_mv->col + neighbors[j].col; - if ((this_mv.col > x->mv_col_min) && - (this_mv.col < x->mv_col_max) && - (this_mv.row > x->mv_row_min) && - (this_mv.row < x->mv_row_max)) { + if (is_mv_in(x, &this_mv)) { const uint8_t *check_here = &in_what[this_mv.row * in_what_stride + this_mv.col]; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 2f1973793..58e58530d 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -572,7 +572,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, int speed) { int i; sf->adaptive_rd_thresh = 1; - sf->recode_loop = (speed < 1); + sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); if (speed == 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; @@ -590,7 +590,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; + sf->recode_loop = ALLOW_RECODE_KFARFGF; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; @@ -626,7 +626,7 @@ static void set_good_speed_feature(VP9_COMMON *cm, sf->last_partitioning_redo_frequency = 3; sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; + sf->recode_loop = ALLOW_RECODE_KFARFGF; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; @@ -743,7 +743,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, int speed) { sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; - sf->recode_loop = (speed < 1); + sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW); if (speed == 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; @@ -761,7 +761,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->adaptive_pred_interp_filter = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; + sf->recode_loop = ALLOW_RECODE_KFARFGF; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; @@ -797,7 +797,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, sf->last_partitioning_redo_frequency = 3; sf->adaptive_rd_thresh = 2; - sf->recode_loop = 2; + sf->recode_loop = ALLOW_RECODE_KFARFGF; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; @@ -846,6 +846,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, } if (speed >= 6) { sf->super_fast_rtc = 1; + sf->always_this_block_size = BLOCK_16X16; } } @@ -865,7 +866,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // best quality defaults sf->RD = 1; sf->search_method = NSTEP; - sf->recode_loop = 1; + sf->recode_loop = ALLOW_RECODE; sf->subpel_search_method = SUBPEL_TREE; sf->subpel_iters_per_step = 2; sf->subpel_force_stop = 0; @@ -933,7 +934,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { // No recode for 1 pass. if (cpi->pass == 0) { - sf->recode_loop = 0; + sf->recode_loop = DISALLOW_RECODE; sf->optimize_coefficients = 0; } @@ -1144,6 +1145,123 @@ static int64_t rescale(int val, int64_t num, int denom) { return (llval * llnum / llden); } +// Initialize layer context data from init_config(). +static void init_layer_context(VP9_COMP *const cpi) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + int temporal_layer = 0; + cpi->svc.spatial_layer_id = 0; + cpi->svc.temporal_layer_id = 0; + for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; + ++temporal_layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q]; + lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality; + lrc->last_q[INTER_FRAME] = lrc->active_worst_quality; + lrc->ni_av_qi = lrc->active_worst_quality; + lrc->total_actual_bits = 0; + lrc->total_target_vs_actual = 0; + lrc->ni_tot_qi = 0; + lrc->tot_q = 0.0; + lrc->avg_q = 0.0; + lrc->ni_frames = 0; + lrc->decimation_count = 0; + lrc->decimation_factor = 0; + lrc->rate_correction_factor = 1.0; + lrc->key_frame_rate_correction_factor = 1.0; + lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * + 1000; + lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); + lrc->bits_off_target = lrc->buffer_level; + } +} + +// Update the layer context from a change_config() call. +static void update_layer_context_change_config(VP9_COMP *const cpi, + const int target_bandwidth) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const RATE_CONTROL *const rc = &cpi->rc; + int temporal_layer = 0; + float bitrate_alloc = 1.0; + for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; + ++temporal_layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; + bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; + // Update buffer-related quantities. + lc->starting_buffer_level = oxcf->starting_buffer_level * bitrate_alloc; + lc->optimal_buffer_level = oxcf->optimal_buffer_level * bitrate_alloc; + lc->maximum_buffer_size = oxcf->maximum_buffer_size * bitrate_alloc; + lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); + // Update framerate-related quantities. + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + // Update qp-related quantities. + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; + lrc->active_worst_quality = rc->active_worst_quality; + } +} + +// Prior to encoding the frame, update framerate-related quantities +// for the current layer. +static void update_layer_framerate(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + const VP9_CONFIG *const oxcf = &cpi->oxcf; + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; + // Update the average layer frame size (non-cumulative per-frame-bw). + if (temporal_layer == 0) { + lc->avg_frame_size = lrc->av_per_frame_bandwidth; + } else { + double prev_layer_framerate = oxcf->framerate / + oxcf->ts_rate_decimator[temporal_layer - 1]; + int prev_layer_target_bandwidth = + oxcf->ts_target_bitrate[temporal_layer - 1] * 1000; + lc->avg_frame_size = + (int)(lc->target_bandwidth - prev_layer_target_bandwidth) / + (lc->framerate - prev_layer_framerate); + } +} + +// Prior to encoding the frame, set the layer context, for the current layer +// to be encoded, to the cpi struct. +static void restore_layer_context(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + int frame_since_key = cpi->rc.frames_since_key; + int frame_to_key = cpi->rc.frames_to_key; + cpi->rc = lc->rc; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; + cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; + cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->output_framerate = lc->framerate; + // Reset the frames_since_key and frames_to_key counters to their values + // before the layer restore. Keep these defined for the stream (not layer). + cpi->rc.frames_since_key = frame_since_key; + cpi->rc.frames_to_key = frame_to_key; +} + +// Save the layer context after encoding the frame. +static void save_layer_context(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + lc->rc = cpi->rc; + lc->target_bandwidth = cpi->oxcf.target_bandwidth; + lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; + lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; + lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->framerate = cpi->output_framerate; +} + static void set_tile_limits(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1170,6 +1288,16 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->subsampling_y = 0; vp9_alloc_compressor_data(cpi); + // Spatial scalability. + cpi->svc.number_spatial_layers = oxcf->ss_number_layers; + // Temporal scalability. + cpi->svc.number_temporal_layers = oxcf->ts_number_layers; + + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + init_layer_context(cpi); + } + // change includes all joint functionality vp9_change_config(ptr, oxcf); @@ -1210,9 +1338,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; - cpi->current_layer = 0; - cpi->use_svc = 0; - set_tile_limits(cpi); cpi->fixed_divide[0] = 0; @@ -1220,7 +1345,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } - void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { VP9_COMP *cpi = (VP9_COMP *)(ptr); VP9_COMMON *const cm = &cpi->common; @@ -1312,10 +1436,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) { - cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size; - cpi->rc.buffer_level = cpi->rc.bits_off_target; - } + cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target, + cpi->oxcf.maximum_buffer_size); + cpi->rc.buffer_level = MIN(cpi->rc.buffer_level, + cpi->oxcf.maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); @@ -1350,6 +1474,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { } update_frame_size(cpi); + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_context_change_config(cpi, cpi->oxcf.target_bandwidth); + } + cpi->speed = cpi->oxcf.cpu_used; if (cpi->oxcf.lag_in_frames == 0) { @@ -1573,6 +1702,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_create_common(cm); + cpi->use_svc = 0; + init_config((VP9_PTR)cpi, oxcf); init_pick_mode_context(cpi); @@ -1588,9 +1719,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->alt_is_last = 0; cpi->gold_is_alt = 0; - // Spatial scalability - cpi->number_spatial_layers = oxcf->ss_number_layers; - // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(cm, cpi->segmentation_map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); @@ -1616,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); } -#ifdef ENTROPY_STATS - if (cpi->pass != 1) - init_context_counters(); -#endif - /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; cpi->key_frame_frequency = cpi->oxcf.key_freq; @@ -2431,8 +2554,8 @@ static int recode_loop_test(const VP9_COMP *cpi, // Is frame recode allowed. // Yes if either recode mode 1 is selected or mode 2 is selected // and the frame is a key frame, golden frame or alt_ref_frame - } else if ((cpi->sf.recode_loop == 1) || - ((cpi->sf.recode_loop == 2) && + } else if ((cpi->sf.recode_loop == ALLOW_RECODE) || + ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) && (cm->frame_type == KEY_FRAME || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { // General over and under shoot tests @@ -2651,25 +2774,67 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { } #endif +static void encode_without_recode_loop(VP9_COMP *cpi, + size_t *size, + uint8_t *dest, + int q) { + VP9_COMMON *const cm = &cpi->common; + vp9_clear_system_state(); // __asm emms; + vp9_set_quantizer(cpi, q); + + // Set up entropy context depending on frame type. The decoder mandates + // the use of the default context, index 0, for keyframes and inter + // frames where the error_resilient_mode or intra_only flag is set. For + // other inter-frames the encoder currently uses only two contexts; + // context 1 for ALTREF frames and context 0 for the others. + if (cm->frame_type == KEY_FRAME) { + vp9_setup_key_frame(cpi); + } else { + if (!cm->intra_only && !cm->error_resilient_mode) { + cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; + } + vp9_setup_inter_frame(cpi); + } + // Variance adaptive and in frame q adjustment experiments are mutually + // exclusive. + if (cpi->oxcf.aq_mode == VARIANCE_AQ) { + vp9_vaq_frame_setup(cpi); + } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { + setup_in_frame_q_adj(cpi); + } + // transform / motion compensation build reconstruction frame + vp9_encode_frame(cpi); + + // Update the skip mb flag probabilities based on the distribution + // seen in the last encoder iteration. + // update_base_skip_probs(cpi); + vp9_clear_system_state(); // __asm emms; +} + static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest, - int *q, + int q, int bottom_index, - int top_index, - int frame_over_shoot_limit, - int frame_under_shoot_limit) { + int top_index) { VP9_COMMON *const cm = &cpi->common; int loop_count = 0; int loop = 0; int overshoot_seen = 0; int undershoot_seen = 0; int q_low = bottom_index, q_high = top_index; + int frame_over_shoot_limit; + int frame_under_shoot_limit; + + // Decide frame size bounds + vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target, + &frame_under_shoot_limit, + &frame_over_shoot_limit); do { vp9_clear_system_state(); // __asm emms; - vp9_set_quantizer(cpi, *q); + vp9_set_quantizer(cpi, q); if (loop_count == 0) { // Set up entropy context depending on frame type. The decoder mandates @@ -2696,7 +2861,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } // transform / motion compensation build reconstruction frame - vp9_encode_frame(cpi); // Update the skip mb flag probabilities based on the distribution @@ -2708,7 +2872,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // Dummy pack of the bitstream using up to date stats to get an // accurate estimate of output frame size to determine if we need // to recode. - if (cpi->sf.recode_loop != 0) { + if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { vp9_save_coding_context(cpi); cpi->dummy_packing = 1; if (!cpi->sf.super_fast_rtc) @@ -2727,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, if ((cm->frame_type == KEY_FRAME) && cpi->rc.this_key_frame_forced && (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) { - int last_q = *q; + int last_q = q; int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm)); int high_err_target = cpi->ambient_err; @@ -2743,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi, (kf_err > low_err_target && cpi->rc.projected_frame_size <= frame_under_shoot_limit)) { // Lower q_high - q_high = *q > q_low ? *q - 1 : q_low; + q_high = q > q_low ? q - 1 : q_low; // Adjust Q - *q = ((*q) * high_err_target) / kf_err; - *q = MIN((*q), (q_high + q_low) >> 1); + q = (q * high_err_target) / kf_err; + q = MIN(q, (q_high + q_low) >> 1); } else if (kf_err < low_err_target && cpi->rc.projected_frame_size >= frame_under_shoot_limit) { // The key frame is much better than the previous frame // Raise q_low - q_low = *q < q_high ? *q + 1 : q_high; + q_low = q < q_high ? q + 1 : q_high; // Adjust Q - *q = ((*q) * low_err_target) / kf_err; - *q = MIN((*q), (q_high + q_low + 1) >> 1); + q = (q * low_err_target) / kf_err; + q = MIN(q, (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: - *q = clamp(*q, q_low, q_high); + q = clamp(q, q_low, q_high); - loop = *q != last_q; + loop = q != last_q; } else if (recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, - *q, MAX(q_high, top_index), bottom_index)) { + q, MAX(q_high, top_index), bottom_index)) { // Is the projected frame size out of range and are we allowed // to attempt to recode. - int last_q = *q; + int last_q = q; int retries = 0; // Frame size out of permitted range: @@ -2781,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi, q_high = cpi->rc.worst_quality; // Raise Qlow as to at least the current value - q_low = *q < q_high ? *q + 1 : q_high; + q_low = q < q_high ? q + 1 : q_high; if (undershoot_seen || loop_count > 1) { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi, 1); - *q = (q_high + q_low + 1) / 2; + q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, MAX(q_high, top_index)); - while (*q < q_low && retries < 10) { + while (q < q_low && retries < 10) { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, MAX(q_high, top_index)); retries++; } @@ -2806,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi, overshoot_seen = 1; } else { // Frame is too small - q_high = *q > q_low ? *q - 1 : q_low; + q_high = q > q_low ? q - 1 : q_low; if (overshoot_seen || loop_count > 1) { vp9_rc_update_rate_correction_factors(cpi, 1); - *q = (q_high + q_low) / 2; + q = (q_high + q_low) / 2; } else { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, top_index); // Special case reset for qlow for constrained quality. // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && - *q < q_low) { - q_low = *q; + q < q_low) { + q_low = q; } - while (*q > q_high && retries < 10) { + while (q > q_high && retries < 10) { vp9_rc_update_rate_correction_factors(cpi, 0); - *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, + q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target, bottom_index, top_index); retries++; } @@ -2836,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, } // Clamp Q to upper and lower limits: - *q = clamp(*q, q_low, q_high); + q = clamp(q, q_low, q_high); - loop = *q != last_q; + loop = q != last_q; } else { loop = 0; } @@ -2911,20 +3075,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; TX_SIZE t; int q; - int frame_over_shoot_limit; - int frame_under_shoot_limit; int top_index; int bottom_index; SPEED_FEATURES *const sf = &cpi->sf; - unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height); + unsigned int max_mv_def = MIN(cm->width, cm->height); struct segmentation *const seg = &cm->seg; set_ext_overrides(cpi); /* Scale the source buffer, if required. */ - if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width || - cm->mi_rows * 8 != cpi->un_scaled_source->y_height) { + if (cm->mi_cols * MI_SIZE != cpi->un_scaled_source->y_width || + cm->mi_rows * MI_SIZE != cpi->un_scaled_source->y_height) { scale_and_extend_frame_nonnormative(cpi->un_scaled_source, &cpi->scaled_source); cpi->Source = &cpi->scaled_source; @@ -2933,12 +3095,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } scale_references(cpi); - // Clear down mmx registers to allow floating point in what follows. vp9_clear_system_state(); - // Clear zbin over-quant value and mode boost values. - cpi->zbin_mode_boost = 0; - // Enable or disable mode based tweaking of the zbin. // For 2 pass only used where GF/ARF prediction quality // is above a threshold. @@ -2946,7 +3104,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->zbin_mode_boost_enabled = 0; // Current default encoder behavior for the altref sign bias. - cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active; + cm->ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active; // Set default state for segment based loop filter update flags. cm->lf.mode_ref_delta_update = 0; @@ -2955,7 +3113,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def); // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate. if (sf->auto_mv_step_size) { - if (frame_is_intra_only(&cpi->common)) { + if (frame_is_intra_only(cm)) { // Initialize max_mv_magnitude for use in the first INTER frame // after a key/intra-only frame. cpi->max_mv_magnitude = max_mv_def; @@ -2964,8 +3122,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Allow mv_steps to correspond to twice the max mv magnitude found // in the previous frame, capped by the default max_mv_magnitude based // on resolution. - cpi->mv_step_param = vp9_init_search_range( - cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude)); + cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 * + cpi->max_mv_magnitude)); cpi->max_mv_magnitude = 0; } } @@ -3002,9 +3160,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // static regions if indicated. // Only allowed in second pass of two pass (as requires lagged coding) // and if the relevant speed feature flag is set. - if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) { + if (cpi->pass == 2 && cpi->sf.static_segmentation) configure_static_seg_features(cpi); - } // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame. @@ -3013,7 +3170,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->frame_type != KEY_FRAME) { if (vp9_rc_drop_frame(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); - cm->current_video_frame++; + ++cm->current_video_frame; return; } } @@ -3051,41 +3208,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_write_yuv_frame(cpi->Source); #endif - // Decide frame size bounds - vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target, - &frame_under_shoot_limit, - &frame_over_shoot_limit); - // Decide q and q bounds. q = vp9_rc_pick_q_and_adjust_q_bounds(cpi, &bottom_index, &top_index); - // JBB : This is realtime mode. In real time mode the first frame - // should be larger. Q of 0 is disabled because we force tx size to be - // 16x16... - if (cpi->sf.super_fast_rtc) { - if (cpi->common.current_video_frame == 0) - q /= 3; - - if (q == 0) - q++; - } - if (!frame_is_intra_only(cm)) { cm->interp_filter = DEFAULT_INTERP_FILTER; /* TODO: Decide this more intelligently */ set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH)); } - encode_with_recode_loop(cpi, - size, - dest, - &q, - bottom_index, - top_index, - frame_over_shoot_limit, - frame_under_shoot_limit); + if (cpi->sf.recode_loop == DISALLOW_RECODE) { + encode_without_recode_loop(cpi, size, dest, q); + } else { + encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index); + } // Special case code to reduce pulsing when key frames are forced at a // fixed interval. Note the reconstruction error if it is the frame before @@ -3132,37 +3270,30 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cpi->common.counts.coef[t], - cpi->coef_counts[t]); - if (!cpi->common.error_resilient_mode && - !cpi->common.frame_parallel_decoding_mode) { - vp9_adapt_coef_probs(&cpi->common); - } - - if (!frame_is_intra_only(&cpi->common)) { - if (!cpi->common.error_resilient_mode && - !cpi->common.frame_parallel_decoding_mode) { - vp9_adapt_mode_probs(&cpi->common); - vp9_adapt_mv_probs(&cpi->common, cpi->common.allow_high_precision_mv); + full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); + + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) + vp9_adapt_coef_probs(cm); + + if (!frame_is_intra_only(cm)) { + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { + vp9_adapt_mode_probs(cm); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } } -#ifdef ENTROPY_STATS - vp9_update_mode_context_stats(cpi); -#endif - #if 0 output_frame_level_debug_stats(cpi); #endif if (cpi->refresh_golden_frame == 1) - cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; + cm->frame_flags |= FRAMEFLAGS_GOLDEN; else - cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN; + cm->frame_flags &= ~FRAMEFLAGS_GOLDEN; if (cpi->refresh_alt_ref_frame == 1) - cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF; + cm->frame_flags |= FRAMEFLAGS_ALTREF; else - cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF; + cm->frame_flags &= ~FRAMEFLAGS_ALTREF; get_ref_frame_flags(cpi); @@ -3211,6 +3342,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // reset to normal state now that we are done. if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame; + if (cm->show_frame) { // current mip will be the prev_mip for the next frame MODE_INFO *temp = cm->prev_mip; @@ -3231,6 +3363,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // update not a real frame ++cm->current_video_frame; } + // restore prev_mi cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; @@ -3516,6 +3649,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, adjust_frame_rate(cpi); } + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_framerate(cpi); + restore_layer_context(cpi); + } + // start with a 0 size frame *size = 0; @@ -3591,6 +3730,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->droppable = !frame_is_reference(cpi); } + // Save layer specific state. + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + save_layer_context(cpi); + } + vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index d0ca962db..90ed606f6 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -23,6 +23,7 @@ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/encoder/vp9_encodemb.h" +#include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_mbgraph.h" #include "vp9/encoder/vp9_mcomp.h" @@ -44,8 +45,9 @@ extern "C" { #else #define MIN_GF_INTERVAL 4 #endif -#define DEFAULT_GF_INTERVAL 7 +#define DEFAULT_GF_INTERVAL 10 #define DEFAULT_KF_BOOST 2000 +#define DEFAULT_GF_BOOST 2000 #define KEY_FRAME_CONTEXT 5 @@ -78,28 +80,6 @@ typedef struct { FRAME_CONTEXT fc; } CODING_CONTEXT; -typedef struct { - double frame; - double intra_error; - double coded_error; - double sr_coded_error; - double ssim_weighted_pred_err; - double pcnt_inter; - double pcnt_motion; - double pcnt_second_ref; - double pcnt_neutral; - double MVr; - double mvr_abs; - double MVc; - double mvc_abs; - double MVrv; - double MVcv; - double mv_in_out_count; - double new_mv_count; - double duration; - double count; -} FIRSTPASS_STATS; - // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { @@ -217,6 +197,17 @@ typedef enum { LAST_FRAME_PARTITION_ALL = 2 } LAST_FRAME_PARTITION_METHOD; +typedef enum { + // No recode. + DISALLOW_RECODE = 0, + // Allow recode for KF and exceeding maximum frame bandwidth. + ALLOW_RECODE_KFMAXBW = 1, + // Allow recode only for KF/ARF/GF frames. + ALLOW_RECODE_KFARFGF = 2, + // Allow recode for all frames based on bitrate constraints. + ALLOW_RECODE = 3, +} RECODE_LOOP_TYPE; + typedef struct { // This flag refers to whether or not to perform rd optimization. int RD; @@ -224,11 +215,7 @@ typedef struct { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; - // Recode_loop can be: - // 0 means we only encode a frame once - // 1 means we can re-encode based on bitrate constraints on any frame - // 2 means we can only recode gold, alt, and key frames. - int recode_loop; + RECODE_LOOP_TYPE recode_loop; // Subpel_search_method can only be subpel_tree which does a subpixel // logarithmic search that keeps stepping at 1/2 pixel units until @@ -407,6 +394,16 @@ typedef struct { int super_fast_rtc; } SPEED_FEATURES; +typedef struct { + RATE_CONTROL rc; + int target_bandwidth; + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; + double framerate; + int avg_frame_size; +} LAYER_CONTEXT; + typedef struct VP9_COMP { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); @@ -451,9 +448,6 @@ typedef struct VP9_COMP { int gld_fb_idx; int alt_fb_idx; - int current_layer; - int use_svc; - #if CONFIG_MULTIPLE_ARF int alt_ref_fb_idx[REF_FRAMES - 3]; #endif @@ -573,46 +567,7 @@ typedef struct VP9_COMP { uint64_t time_pick_lpf; uint64_t time_encode_sb_row; - struct twopass_rc { - unsigned int section_intra_rating; - unsigned int next_iiratio; - unsigned int this_iiratio; - FIRSTPASS_STATS total_stats; - FIRSTPASS_STATS this_frame_stats; - FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; - FIRSTPASS_STATS total_left_stats; - int first_pass_done; - int64_t bits_left; - int64_t clip_bits_total; - double avg_iiratio; - double modified_error_min; - double modified_error_max; - double modified_error_total; - double modified_error_left; - double kf_intra_err_min; - double gf_intra_err_min; - int static_scene_max_gf_interval; - int kf_bits; - // Remaining error from uncoded frames in a gf group. Two pass use only - int64_t gf_group_error_left; - - // Projected total bits available for a key frame group of frames - int64_t kf_group_bits; - - // Error score of frames still to be coded in kf group - int64_t kf_group_error_left; - - // Projected Bits available for a group of frames including 1 GF or ARF - int64_t gf_group_bits; - // Bits for the golden frame or ARF - 2 pass only - int gf_bits; - int alt_extra_bits; - - int sr_update_lag; - - int kf_zeromotion_pct; - int gf_zeromotion_pct; - } twopass; + struct twopass_rc twopass; YV12_BUFFER_CONFIG alt_ref_buffer; YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; @@ -669,7 +624,18 @@ typedef struct VP9_COMP { int initial_width; int initial_height; - int number_spatial_layers; + int use_svc; + + struct svc { + int spatial_layer_id; + int temporal_layer_id; + int number_spatial_layers; + int number_temporal_layers; + // Layer context used for rate control in CBR mode, only defined for + // temporal layers for now. + LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; + } svc; + int enable_encode_breakout; // Default value is 1. From first pass stats, // encode_breakout may be disabled. @@ -726,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi); void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); -void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); - void vp9_set_speed_features(VP9_COMP *cpi); int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source, diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index a2eea1cd7..862573f3f 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -79,55 +79,47 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - int i, rc, eob; - int zbins[2], nzbins[2]; - int x, y, z, sz; + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1), + ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) }; + const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; + int idx = 0; int idx_arr[1024]; + int i, eob = -1; - vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); - vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t)); - - eob = -1; - - // Base ZBIN - zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); - zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); - nzbins[0] = zbins[0] * -1; - nzbins[1] = zbins[1] * -1; + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t)); if (!skip_block) { // Pre-scan pass for (i = 0; i < n_coeffs; i++) { - rc = scan[i]; - z = coeff_ptr[rc]; + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; // If the coefficient is out of the base ZBIN range, keep it for // quantization. - if (z >= zbins[rc != 0] || z <= nzbins[rc != 0]) + if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) idx_arr[idx++] = i; } // Quantization pass: only process the coefficients selected in // pre-scan pass. Note: idx can be zero. for (i = 0; i < idx; i++) { - rc = scan[idx_arr[i]]; - - z = coeff_ptr[rc]; - sz = (z >> 31); // sign of z - x = (z ^ sz) - sz; // x = abs(z) - - x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); - x = clamp(x, INT16_MIN, INT16_MAX); - y = ((((x * quant_ptr[rc != 0]) >> 16) + x) * - quant_shift_ptr[rc != 0]) >> 15; // quantize (x) - - x = (y ^ sz) - sz; // get the sign back - qcoeff_ptr[rc] = x; // write to destination - dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / 2; // dequantized value - - if (y) - eob = idx_arr[i]; // last nonzero coeffs + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + int tmp; + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); + tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) * + quant_shift_ptr[rc != 0]) >> 15; + + qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; + + if (tmp) + eob = idx_arr[i]; } } *eob_ptr = eob + 1; @@ -136,8 +128,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs, void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan) { MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane* p = &x->plane[plane]; - struct macroblockd_plane* pd = &xd->plane[plane]; + struct macroblock_plane *p = &x->plane[plane]; + struct macroblockd_plane *pd = &xd->plane[plane]; vp9_quantize_b(BLOCK_OFFSET(p->coeff, block), 16, x->skip_block, @@ -223,38 +215,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) { } void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { - int i; - VP9_COMMON *const cm = &cpi->common; + const VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; - int zbin_extra; - int segment_id = xd->mi_8x8[0]->mbmi.segment_id; - const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id, - cpi->common.base_qindex); - - int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); + const int segment_id = xd->mi_8x8[0]->mbmi.segment_id; + const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); + const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); + const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj; + int i; // Y - zbin_extra = (cpi->common.y_dequant[qindex][1] * - (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; - x->plane[0].quant = cpi->y_quant[qindex]; x->plane[0].quant_shift = cpi->y_quant_shift[qindex]; x->plane[0].zbin = cpi->y_zbin[qindex]; x->plane[0].round = cpi->y_round[qindex]; - x->plane[0].zbin_extra = (int16_t)zbin_extra; - x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex]; + x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); + xd->plane[0].dequant = cm->y_dequant[qindex]; // UV - zbin_extra = (cpi->common.uv_dequant[qindex][1] * - (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7; - for (i = 1; i < 3; i++) { x->plane[i].quant = cpi->uv_quant[qindex]; x->plane[i].quant_shift = cpi->uv_quant_shift[qindex]; x->plane[i].zbin = cpi->uv_zbin[qindex]; x->plane[i].round = cpi->uv_round[qindex]; - x->plane[i].zbin_extra = (int16_t)zbin_extra; - x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex]; + x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); + xd->plane[i].dequant = cm->uv_dequant[qindex]; } #if CONFIG_ALPHA @@ -263,18 +247,14 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[3].zbin = cpi->a_zbin[qindex]; x->plane[3].round = cpi->a_round[qindex]; x->plane[3].zbin_extra = (int16_t)zbin_extra; - x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex]; + xd->plane[3].dequant = cm->a_dequant[qindex]; #endif - x->skip_block = vp9_segfeature_active(&cpi->common.seg, segment_id, - SEG_LVL_SKIP); - - /* save this macroblock QIndex for vp9_update_zbin_extra() */ + x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); x->q_index = qindex; - /* R/D setup */ - cpi->mb.errorperbit = rdmult >> 6; - cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); + x->errorperbit = rdmult >> 6; + x->errorperbit += (x->errorperbit == 0); vp9_initialize_me_consts(cpi, x->q_index); } diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index abbf39b81..b3e9f4538 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -215,12 +215,12 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { rc->av_per_frame_bandwidth >> 5); if (target < min_frame_target) target = min_frame_target; - if (cpi->refresh_golden_frame && rc->source_alt_ref_active) { + if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { // If there is an active ARF at this location use the minimum // bits on this frame even if it is a constructed arf. // The active maximum quantizer insures that an appropriate // number of bits will be spent if needed for constructed ARFs. - target = 0; + target = min_frame_target; } // Clip the frame target to the maximum allowed value. if (target > rc->max_frame_bandwidth) @@ -241,6 +241,26 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } + +// Update the buffer level for higher layers, given the encoded current layer. +static void update_layer_buffer_level(VP9_COMP *const cpi, + int encoded_frame_size) { + int temporal_layer = 0; + int current_temporal_layer = cpi->svc.temporal_layer_id; + for (temporal_layer = current_temporal_layer + 1; + temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *lrc = &lc->rc; + int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - + encoded_frame_size); + lrc->bits_off_target += bits_off_for_this_layer; + + // Clip buffer level to maximum buffer size for the layer. + lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->buffer_level = lrc->bits_off_target; + } +} + // Update the buffer level: leaky bucket model. static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; @@ -255,14 +275,18 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } // Clip the buffer level to the maximum specified buffer size. - rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->buffer_level = rc->bits_off_target; + + if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_buffer_level(cpi, encoded_frame_size); + } } int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; - if (!oxcf->drop_frames_water_mark) { return 0; } else { @@ -273,7 +297,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(oxcf->drop_frames_water_mark * - oxcf->optimal_buffer_level / 100); + oxcf->optimal_buffer_level / 100); if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { --rc->decimation_factor; @@ -301,7 +325,8 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { if (cpi->common.frame_type == KEY_FRAME) { return cpi->rc.key_frame_rate_correction_factor; } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else return cpi->rc.rate_correction_factor; @@ -312,7 +337,8 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { if (cpi->common.frame_type == KEY_FRAME) { cpi->rc.key_frame_rate_correction_factor = factor; } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else cpi->rc.rate_correction_factor = factor; @@ -461,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, double q_adj_factor = 1.0; double q_val; - // Baseline value derived from cpi->active_worst_quality and kf boost - active_best_quality = get_active_quality(active_worst_quality, + active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME], rc->kf_boost, kf_low, kf_high, kf_low_motion_minq, @@ -495,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { q = rc->avg_frame_qindex[INTER_FRAME]; } else { - q = active_worst_quality; + q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? + active_worst_quality : rc->avg_frame_qindex[KEY_FRAME]; } // For constrained quality dont allow Q less than the cq level if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) { @@ -538,7 +564,25 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { active_best_quality = cpi->cq_target_quality; } else { - active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + // Use the lower of active_worst_quality and recent/average Q. + if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + if (cm->current_video_frame > 1) { + if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) + active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + else + active_best_quality = inter_minq[active_worst_quality]; + } else { + if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality) + active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + else + active_best_quality = inter_minq[active_worst_quality]; + } + } else { + if (cm->current_video_frame > 1) + active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + else + active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; + } // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) && @@ -574,7 +618,6 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, *top_index = (active_worst_quality + active_best_quality) / 2; } #endif - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { q = active_best_quality; // Special case code to try and match quality with forced key frames @@ -810,12 +853,28 @@ static int rc_pick_q_and_adjust_q_bounds_two_pass(const VP9_COMP *cpi, int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi, int *bottom_index, int *top_index) { + int q; if (cpi->pass == 0) - return rc_pick_q_and_adjust_q_bounds_one_pass( + q = rc_pick_q_and_adjust_q_bounds_one_pass( cpi, bottom_index, top_index); else - return rc_pick_q_and_adjust_q_bounds_two_pass( + q = rc_pick_q_and_adjust_q_bounds_two_pass( cpi, bottom_index, top_index); + + // JBB : This is realtime mode. In real time mode the first frame + // should be larger. Q of 0 is disabled because we force tx size to be + // 16x16... + if (cpi->sf.super_fast_rtc) { + if (cpi->common.current_video_frame == 0) + q /= 3; + if (q == 0) + q++; + if (q < *bottom_index) + *bottom_index = q; + else if (q > *top_index) + *top_index = q; + } + return q; } void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, @@ -918,7 +977,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->projected_frame_size = (bytes_used << 3); // Post encode loop adjustment of Q prediction. - vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop || + vp9_rc_update_rate_correction_factors( + cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); // Keep a record of last Q and ambient average Q. @@ -927,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2); } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { rc->last_q[2] = cm->base_qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO( 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2); @@ -1002,21 +1063,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { cpi->rc.frames_to_key--; } -void vp9_rc_get_svc_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % - cpi->key_frame_frequency == 0))) { - cm->frame_type = KEY_FRAME; - cpi->rc.source_alt_ref_active = 0; - } else { - cm->frame_type = INTER_FRAME; - } - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; -} - static int test_for_kf_one_pass(VP9_COMP *cpi) { // Placeholder function for auto key frame return 0; @@ -1025,62 +1071,88 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) { #define USE_ALTREF_FOR_ONE_PASS 1 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { + static const int af_ratio = 10; const RATE_CONTROL *rc = &cpi->rc; - int target = rc->av_per_frame_bandwidth; - target = vp9_rc_clamp_pframe_target_size(cpi, target); - return target; + int target; +#if USE_ALTREF_FOR_ONE_PASS + target = (!rc->is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ? + (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) / + (cpi->rc.baseline_gf_interval + af_ratio - 1) : + (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) / + (cpi->rc.baseline_gf_interval + af_ratio - 1); +#else + target = rc->av_per_frame_bandwidth; +#endif + return vp9_rc_clamp_pframe_target_size(cpi, target); } static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { + static const int kf_ratio = 25; const RATE_CONTROL *rc = &cpi->rc; - int target = rc->av_per_frame_bandwidth * 8; - target = vp9_rc_clamp_iframe_target_size(cpi, target); - return target; + int target = rc->av_per_frame_bandwidth * kf_ratio; + return vp9_rc_clamp_iframe_target_size(cpi, target); +} + +static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { + int active_worst_quality; + if (cpi->common.frame_type == KEY_FRAME) { + if (cpi->common.current_video_frame == 0) { + active_worst_quality = cpi->rc.worst_quality; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; + } + } else if (!cpi->rc.is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + if (cpi->common.current_video_frame == 1) { + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[INTER_FRAME]; + } + } else { + if (cpi->common.current_video_frame == 1) { + active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; + } else { + // Choose active worst quality twice as large as the last q. + active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2; + } + } + if (active_worst_quality > cpi->rc.worst_quality) + active_worst_quality = cpi->rc.worst_quality; + return active_worst_quality; } void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; int target; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || + rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = DEFAULT_KF_BOOST; - cpi->rc.source_alt_ref_active = 0; - if (cm->current_video_frame == 0) { - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } else { - // Choose active worst quality twice as large as the last q. - cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2; - if (cpi->rc.active_worst_quality > cpi->rc.worst_quality) - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } + rc->this_key_frame_forced = cm->current_video_frame != 0 && + rc->frames_to_key == 0; + rc->frames_to_key = cpi->key_frame_frequency; + rc->kf_boost = DEFAULT_KF_BOOST; + rc->source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; - if (cm->current_video_frame == 1) { - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } else { - // Choose active worst quality twice as large as the last q. - cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2; - if (cpi->rc.active_worst_quality > cpi->rc.worst_quality) - cpi->rc.active_worst_quality = cpi->rc.worst_quality; - } } - if (cpi->rc.frames_till_gf_update_due == 0) { - cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL; - cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval; + if (rc->frames_till_gf_update_due == 0) { + rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; // NOTE: frames_till_gf_update_due must be <= frames_to_key. - if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key) - cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key; + if (rc->frames_till_gf_update_due > rc->frames_to_key) + rc->frames_till_gf_update_due = rc->frames_to_key; cpi->refresh_golden_frame = 1; - cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; - cpi->rc.gfu_boost = 2000; + rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; + rc->gfu_boost = DEFAULT_GF_BOOST; } + cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); if (cm->frame_type == KEY_FRAME) target = calc_iframe_target_size_one_pass_vbr(cpi); else @@ -1099,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { const RATE_CONTROL *rc = &cpi->rc; int active_worst_quality = rc->active_worst_quality; // Maximum limit for down adjustment, ~20%. - int max_adjustment_down = active_worst_quality / 5; // Buffer level below which we push active_worst to worst_quality. int critical_level = oxcf->optimal_buffer_level >> 2; int adjustment = 0; int buff_lvl_step = 0; + if (cpi->common.frame_type == KEY_FRAME) + return rc->worst_quality; if (rc->buffer_level > oxcf->optimal_buffer_level) { // Adjust down. + int max_adjustment_down = active_worst_quality / 5; if (max_adjustment_down) { buff_lvl_step = (int)((oxcf->maximum_buffer_size - oxcf->optimal_buffer_level) / max_adjustment_down); @@ -1135,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; - int target = rc->av_per_frame_bandwidth; - const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, - FRAME_OVERHEAD_BITS); const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100; + int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4, + FRAME_OVERHEAD_BITS); + int target = rc->av_per_frame_bandwidth; + if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + // Note that for layers, av_per_frame_bandwidth is the cumulative + // per-frame-bandwidth. For the target size of this frame, use the + // layer average frame size (i.e., non-cumulative per-frame-bw). + int current_temporal_layer = cpi->svc.temporal_layer_id; + const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer]; + target = lc->avg_frame_size; + min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); + } if (diff > 0) { // Lower the target bandwidth for this frame. const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct); @@ -1149,51 +1232,73 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct); target += (target * pct_high) / 200; } - if (target < min_frame_target) - target = min_frame_target; - return target; + return MAX(min_frame_target, target); } static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { - int target; const RATE_CONTROL *rc = &cpi->rc; + if (cpi->common.current_video_frame == 0) { - target = cpi->oxcf.starting_buffer_level / 2; + return cpi->oxcf.starting_buffer_level / 2; } else { - int initial_boost = 32; + const int initial_boost = 32; int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); if (rc->frames_since_key < cpi->output_framerate / 2) { kf_boost = (int)(kf_boost * rc->frames_since_key / (cpi->output_framerate / 2)); } - target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4; + return ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4; } - return target; +} + +void vp9_rc_get_svc_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + int target = cpi->rc.av_per_frame_bandwidth; + if ((cm->current_video_frame == 0) || + (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + cpi->key_frame_frequency == 0))) { + cm->frame_type = KEY_FRAME; + cpi->rc.source_alt_ref_active = 0; + if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + target = calc_iframe_target_size_one_pass_cbr(cpi); + } + } else { + cm->frame_type = INTER_FRAME; + if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + target = calc_pframe_target_size_one_pass_cbr(cpi); + } + } + cpi->rc.active_worst_quality = + calc_active_worst_quality_one_pass_cbr(cpi); + vp9_rc_set_frame_target(cpi, target); + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; + RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0 || cm->frame_flags & FRAMEFLAGS_KEY || - cpi->rc.frames_to_key == 0 || + rc->frames_to_key == 0 || (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) { cm->frame_type = KEY_FRAME; - cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 && - cpi->rc.frames_to_key == 0; - cpi->rc.frames_to_key = cpi->key_frame_frequency; - cpi->rc.kf_boost = DEFAULT_KF_BOOST; - cpi->rc.source_alt_ref_active = 0; + rc->this_key_frame_forced = cm->current_video_frame != 0 && + rc->frames_to_key == 0; + rc->frames_to_key = cpi->key_frame_frequency; + rc->kf_boost = DEFAULT_KF_BOOST; + rc->source_alt_ref_active = 0; target = calc_iframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = cpi->rc.worst_quality; } else { cm->frame_type = INTER_FRAME; target = calc_pframe_target_size_one_pass_cbr(cpi); - cpi->rc.active_worst_quality = - calc_active_worst_quality_one_pass_cbr(cpi); } + cpi->rc.active_worst_quality = + calc_active_worst_quality_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); // Don't use gf_update by default in CBR mode. - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e5230feb4..fcfab2a41 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, so = &vp9_scan_orders[TX_4X4][tx_type]; if (tx_type != DCT_DCT) - vp9_short_fht4x4(src_diff, coeff, 8, tx_type); + vp9_fht4x4(src_diff, coeff, 8, tx_type); else x->fwd_txm4x4(src_diff, coeff, 8); @@ -1645,14 +1645,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi_buf, int filter_idx, int_mv seg_mvs[4][MAX_REF_FRAMES], int mi_row, int mi_col) { - int i, br = 0, idx, idy; + int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; MB_PREDICTION_MODE this_mode; + MACROBLOCKD *xd = &x->e_mbd; VP9_COMMON *cm = &cpi->common; - MODE_INFO *mi = x->e_mbd.mi_8x8[0]; + MODE_INFO *mi = xd->mi_8x8[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; const int label_count = 4; int64_t this_segment_rd = 0; int label_mv_thresh; @@ -1660,7 +1661,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize = mbmi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - vp9_variance_fn_ptr_t *v_fn_ptr; + vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; int mode_idx; @@ -1670,8 +1671,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, pd->above_context, sizeof(t_above)); vpx_memcpy(t_left, pd->left_context, sizeof(t_left)); - v_fn_ptr = &cpi->fn_ptr[bsize]; - // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on // segments. setting this to 1 would make mv thresh @@ -1687,20 +1686,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; MB_PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; - i = idy * 2 + idx; - - frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile, - i, 0, mi_row, mi_col, - &frame_mv[NEARESTMV][mbmi->ref_frame[0]], - &frame_mv[NEARMV][mbmi->ref_frame[0]]); - if (has_second_rf) { - frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0; - vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile, - i, 1, mi_row, mi_col, - &frame_mv[NEARESTMV][mbmi->ref_frame[1]], - &frame_mv[NEARMV][mbmi->ref_frame[1]]); + const int i = idy * 2 + idx; + int ref; + + for (ref = 0; ref < 1 + has_second_rf; ++ref) { + const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; + frame_mv[ZEROMV][frame].as_int = 0; + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col, + &frame_mv[NEARESTMV][frame], + &frame_mv[NEARMV][frame]); } + // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { const struct buf_2d orig_src = x->plane[0].src; @@ -2042,8 +2038,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->sse = block_sse; // update the coding decisions - for (i = 0; i < 4; ++i) - bsi->modes[i] = mi->bmi[i].as_mode; + for (k = 0; k < 4; ++k) + bsi->modes[k] = mi->bmi[k].as_mode; } static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad.c index 58c5df47e..58c5df47e 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad.c diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c index ea031fb07..2b82d9750 100644 --- a/vp9/encoder/x86/vp9_dct_avx2.c +++ b/vp9/encoder/x86/vp9_dct_avx2.c @@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) { transpose_4x4_avx2(in); } -void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht4x4_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[4]; - load_buffer_4x4_avx2(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct4_avx2(in); - fdct4_avx2(in); + case DCT_DCT: + vp9_fdct4x4_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_4x4_avx2(input, in, stride); fadst4_avx2(in); fdct4_avx2(in); + write_buffer_4x4_avx2(output, in); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_4x4_avx2(input, in, stride); fdct4_avx2(in); fadst4_avx2(in); + write_buffer_4x4_avx2(output, in); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_4x4_avx2(input, in, stride); fadst4_avx2(in); fadst4_avx2(in); + write_buffer_4x4_avx2(output, in); break; default: assert(0); break; } - write_buffer_4x4_avx2(output, in); } void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) { @@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) { array_transpose_8x8_avx2(in, in); } -void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht8x8_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[8]; - load_buffer_8x8_avx2(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct8_avx2(in); - fdct8_avx2(in); + case DCT_DCT: + vp9_fdct8x8_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_8x8_avx2(input, in, stride); fadst8_avx2(in); fdct8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_8x8_avx2(input, in, stride); fdct8_avx2(in); fadst8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_8x8_avx2(input, in, stride); fadst8_avx2(in); fadst8_avx2(in); + right_shift_8x8_avx2(in, 1); + write_buffer_8x8_avx2(output, in, 8); break; default: assert(0); break; } - right_shift_8x8_avx2(in, 1); - write_buffer_8x8_avx2(output, in, 8); } void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) { @@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) { array_transpose_16x16_avx2(in0, in1); } -void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht16x16_avx2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in0[16], in1[16]; - load_buffer_16x16_avx2(input, in0, in1, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct16_avx2(in0, in1); - right_shift_16x16_avx2(in0, in1); - fdct16_avx2(in0, in1); + case DCT_DCT: + vp9_fdct16x16_avx2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_16x16_avx2(input, in0, in1, stride); fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fdct16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_16x16_avx2(input, in0, in1, stride); fdct16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fadst16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_16x16_avx2(input, in0, in1, stride); fadst16_avx2(in0, in1); right_shift_16x16_avx2(in0, in1); fadst16_avx2(in0, in1); + write_buffer_16x16_avx2(output, in0, in1, 16); break; default: assert(0); break; } - write_buffer_16x16_avx2(output, in0, in1, 16); } #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2 diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index c876cc273..852cf8667 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) { transpose_4x4(in); } -void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht4x4_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[4]; - load_buffer_4x4(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct4_sse2(in); - fdct4_sse2(in); + case DCT_DCT: + vp9_fdct4x4_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_4x4(input, in, stride); fadst4_sse2(in); fdct4_sse2(in); + write_buffer_4x4(output, in); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_4x4(input, in, stride); fdct4_sse2(in); fadst4_sse2(in); + write_buffer_4x4(output, in); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_4x4(input, in, stride); fadst4_sse2(in); fadst4_sse2(in); + write_buffer_4x4(output, in); break; - default: - assert(0); - break; + default: + assert(0); + break; } - write_buffer_4x4(output, in); } void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) { @@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) { array_transpose_8x8(in, in); } -void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht8x8_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in[8]; - load_buffer_8x8(input, in, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct8_sse2(in); - fdct8_sse2(in); + case DCT_DCT: + vp9_fdct8x8_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_8x8(input, in, stride); fadst8_sse2(in); fdct8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_8x8(input, in, stride); fdct8_sse2(in); fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_8x8(input, in, stride); fadst8_sse2(in); fadst8_sse2(in); + right_shift_8x8(in, 1); + write_buffer_8x8(output, in, 8); break; default: assert(0); break; } - right_shift_8x8(in, 1); - write_buffer_8x8(output, in, 8); } void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) { @@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) { array_transpose_16x16(in0, in1); } -void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output, - int stride, int tx_type) { +void vp9_fht16x16_sse2(const int16_t *input, int16_t *output, + int stride, int tx_type) { __m128i in0[16], in1[16]; - load_buffer_16x16(input, in0, in1, stride); + switch (tx_type) { - case 0: // DCT_DCT - fdct16_sse2(in0, in1); - right_shift_16x16(in0, in1); - fdct16_sse2(in0, in1); + case DCT_DCT: + vp9_fdct16x16_sse2(input, output, stride); break; - case 1: // ADST_DCT + case ADST_DCT: + load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fdct16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; - case 2: // DCT_ADST + case DCT_ADST: + load_buffer_16x16(input, in0, in1, stride); fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; - case 3: // ADST_ADST + case ADST_ADST: + load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); + write_buffer_16x16(output, in0, in1, 16); break; default: assert(0); break; } - write_buffer_16x16(output, in0, in1, 16); } #define FDCT32x32_2D vp9_fdct32x32_rd_sse2 diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm index db306603b..48ccef8cc 100644 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm @@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ pmaxsw m8, m7 pshuflw m7, m8, 0x1 pmaxsw m8, m7 - pextrw [r2], m8, 0 + pextrw r6, m8, 0 + mov [r2], r6 RET ; skip-block, i.e. just write all zeroes @@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endmacro INIT_XMM ssse3 -QUANTIZE_FN b, 6 +QUANTIZE_FN b, 7 QUANTIZE_FN b_32x32, 7 |