summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_dct.c179
-rw-r--r--vp9/encoder/vp9_dct.h32
-rw-r--r--vp9/encoder/vp9_encodeframe.c249
-rw-r--r--vp9/encoder/vp9_encodemb.c7
-rw-r--r--vp9/encoder/vp9_firstpass.c8
-rw-r--r--vp9/encoder/vp9_firstpass.h82
-rw-r--r--vp9/encoder/vp9_lookahead.c3
-rw-r--r--vp9/encoder/vp9_mcomp.c26
-rw-r--r--vp9/encoder/vp9_onyx_if.c381
-rw-r--r--vp9/encoder/vp9_onyx_int.h112
-rw-r--r--vp9/encoder/vp9_quantize.c102
-rw-r--r--vp9/encoder/vp9_ratectrl.c283
-rw-r--r--vp9/encoder/vp9_rdopt.c40
-rw-r--r--vp9/encoder/vp9_sad.c (renamed from vp9/encoder/vp9_sad_c.c)0
-rw-r--r--vp9/encoder/x86/vp9_dct_avx2.c77
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c83
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3.asm5
17 files changed, 894 insertions, 775 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index a840b480a..a9d168cc8 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -18,8 +18,6 @@
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
-
static INLINE int fdct_round_shift(int input) {
int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
assert(INT16_MIN <= rv && rv <= INT16_MAX);
@@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = {
{ fadst4, fadst4 } // ADST_ADST = 3
};
-void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[4 * 4];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[4], temp_out[4];
- const transform_2d ht = FHT_4[tx_type];
+void vp9_fht4x4_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct4x4_c(input, output, stride);
+ } else {
+ int16_t out[4 * 4];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[4], temp_out[4];
+ const transform_2d ht = FHT_4[tx_type];
- // Columns
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = input[j * stride + i] * 16;
- if (i == 0 && temp_in[0])
- temp_in[0] += 1;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- outptr[j * 4 + i] = temp_out[j];
- }
+ // Columns
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = input[j * stride + i] * 16;
+ if (i == 0 && temp_in[0])
+ temp_in[0] += 1;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ outptr[j * 4 + i] = temp_out[j];
+ }
- // Rows
- for (i = 0; i < 4; ++i) {
- for (j = 0; j < 4; ++j)
- temp_in[j] = out[j + i * 4];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 4; ++j)
- output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ // Rows
+ for (i = 0; i < 4; ++i) {
+ for (j = 0; j < 4; ++j)
+ temp_in[j] = out[j + i * 4];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 4; ++j)
+ output[j + i * 4] = (temp_out[j] + 1) >> 2;
+ }
}
}
@@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = {
{ fadst8, fadst8 } // ADST_ADST = 3
};
-void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[64];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[8], temp_out[8];
- const transform_2d ht = FHT_8[tx_type];
-
- // Columns
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- outptr[j * 8 + i] = temp_out[j];
- }
+void vp9_fht8x8_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct8x8_c(input, output, stride);
+ } else {
+ int16_t out[64];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[8], temp_out[8];
+ const transform_2d ht = FHT_8[tx_type];
+
+ // Columns
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ outptr[j * 8 + i] = temp_out[j];
+ }
- // Rows
- for (i = 0; i < 8; ++i) {
- for (j = 0; j < 8; ++j)
- temp_in[j] = out[j + i * 8];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 8; ++j)
- output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ // Rows
+ for (i = 0; i < 8; ++i) {
+ for (j = 0; j < 8; ++j)
+ temp_in[j] = out[j + i * 8];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 8; ++j)
+ output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+ }
}
}
@@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = {
{ fadst16, fadst16 } // ADST_ADST = 3
};
-void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
- int16_t out[256];
- int16_t *outptr = &out[0];
- int i, j;
- int16_t temp_in[16], temp_out[16];
- const transform_2d ht = FHT_16[tx_type];
-
- // Columns
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = input[j * stride + i] * 4;
- ht.cols(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
-// outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
- }
+void vp9_fht16x16_c(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vp9_fdct16x16_c(input, output, stride);
+ } else {
+ int16_t out[256];
+ int16_t *outptr = &out[0];
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+ const transform_2d ht = FHT_16[tx_type];
+
+ // Columns
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+ }
- // Rows
- for (i = 0; i < 16; ++i) {
- for (j = 0; j < 16; ++j)
- temp_in[j] = out[j + i * 16];
- ht.rows(temp_in, temp_out);
- for (j = 0; j < 16; ++j)
- output[j + i * 16] = temp_out[j];
+ // Rows
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j + i * 16];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j + i * 16] = temp_out[j];
+ }
}
}
@@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
out[j + i * 32] = temp_out[j];
}
}
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct4x4(input, output, stride);
- else
- vp9_short_fht4x4(input, output, stride, tx_type);
-}
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct8x8(input, output, stride);
- else
- vp9_short_fht8x8(input, output, stride, tx_type);
-}
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride) {
- if (tx_type == DCT_DCT)
- vp9_fdct16x16(input, output, stride);
- else
- vp9_short_fht16x16(input, output, stride, tx_type);
-}
diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h
deleted file mode 100644
index cf5f001a9..000000000
--- a/vp9/encoder/vp9_dct.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_DCT_H_
-#define VP9_ENCODER_VP9_DCT_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
- int stride);
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_VP9_DCT_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index eaedf1e07..442170f0c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) {
}
// Macroblock activity masking
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
#if USE_ACT_INDEX
x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
@@ -491,24 +491,26 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
} else {
// Note how often each mode chosen as best
cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
+
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -671,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
}
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
- vp9_activity_masking(cpi, x);
+ activity_masking(cpi, x);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state(); // __asm emms;
@@ -711,36 +713,40 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
static void update_stats(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MODE_INFO *mi = xd->mi_8x8[0];
- MB_MODE_INFO *const mbmi = &mi->mbmi;
+ const MACROBLOCK *const x = &cpi->mb;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MODE_INFO *const mi = xd->mi_8x8[0];
+ const MB_MODE_INFO *const mbmi = &mi->mbmi;
if (!frame_is_intra_only(cm)) {
const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
SEG_LVL_REF_FRAME);
+ if (!seg_ref_active) {
+ FRAME_COUNTS *const counts = &cm->counts;
+ const int inter_block = is_inter_block(mbmi);
- if (!seg_ref_active)
- cm->counts.intra_inter[vp9_get_intra_inter_context(xd)]
- [is_inter_block(mbmi)]++;
-
- // If the segment reference feature is enabled we have only a single
- // reference frame allowed for the segment so exclude it from
- // the reference frame counts used to work out probabilities.
- if (is_inter_block(mbmi) && !seg_ref_active) {
- if (cm->reference_mode == REFERENCE_MODE_SELECT)
- cm->counts.comp_inter[vp9_get_reference_mode_context(cm, xd)]
- [has_second_ref(mbmi)]++;
-
- if (has_second_ref(mbmi)) {
- cm->counts.comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
- [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
- } else {
- cm->counts.single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
- [mbmi->ref_frame[0] != LAST_FRAME]++;
- if (mbmi->ref_frame[0] != LAST_FRAME)
- cm->counts.single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
- [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
+ counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
+
+ // If the segment reference feature is enabled we have only a single
+ // reference frame allowed for the segment so exclude it from
+ // the reference frame counts used to work out probabilities.
+ if (inter_block) {
+ const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+
+ if (cm->reference_mode == REFERENCE_MODE_SELECT)
+ counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
+ [has_second_ref(mbmi)]++;
+
+ if (has_second_ref(mbmi)) {
+ counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
+ [ref0 == GOLDEN_FRAME]++;
+ } else {
+ counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
+ [ref0 != LAST_FRAME]++;
+ if (ref0 != LAST_FRAME)
+ counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
+ [ref0 != GOLDEN_FRAME]++;
+ }
}
}
}
@@ -1072,17 +1078,18 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
} else {
// Note how often each mode chosen as best
cpi->mode_chosen_counts[mb_mode_index]++;
- if (is_inter_block(mbmi) &&
- (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
- int_mv best_mv[2];
- for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
- best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
- vp9_update_mv_count(cpi, x, best_mv);
- }
+ if (is_inter_block(mbmi)) {
+ if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+ int_mv best_mv[2];
+ for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+ best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+ vp9_update_mv_count(cpi, x, best_mv);
+ }
- if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
- const int ctx = vp9_get_pred_context_switchable_interp(xd);
- ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ if (cm->interp_filter == SWITCHABLE) {
+ const int ctx = vp9_get_pred_context_switchable_interp(xd);
+ ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+ }
}
}
}
@@ -2181,108 +2188,6 @@ static void switch_tx_mode(VP9_COMP *cpi) {
cpi->common.tx_mode = ALLOW_32X32;
}
-static void encode_frame_internal(VP9_COMP *cpi) {
- int mi_row;
- MACROBLOCK *const x = &cpi->mb;
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
-
-// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-// cpi->common.current_video_frame, cpi->common.show_frame,
-// cm->frame_type);
-
- vp9_zero(cm->counts.switchable_interp);
- vp9_zero(cpi->tx_stepdown_count);
-
- xd->mi_8x8 = cm->mi_grid_visible;
- // required for vp9_frame_init_quantizer
- xd->mi_8x8[0] = cm->mi;
-
- xd->last_mi = cm->prev_mi;
-
- vp9_zero(cm->counts.mv);
- vp9_zero(cpi->coef_counts);
- vp9_zero(cm->counts.eob_branch);
-
- cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
- && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
- switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
-
- vp9_frame_init_quantizer(cpi);
-
- vp9_initialize_rd_consts(cpi);
- vp9_initialize_me_consts(cpi, cm->base_qindex);
- switch_tx_mode(cpi);
-
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
- // Initialize encode frame context.
- init_encode_frame_mb_context(cpi);
-
- // Build a frame level activity map
- build_activity_map(cpi);
- }
-
- // Re-initialize encode frame context.
- init_encode_frame_mb_context(cpi);
-
- vp9_zero(cpi->rd_comp_pred_diff);
- vp9_zero(cpi->rd_filter_diff);
- vp9_zero(cpi->rd_tx_select_diff);
- vp9_zero(cpi->rd_tx_select_threshes);
-
- set_prev_mi(cm);
-
- {
- struct vpx_usec_timer emr_timer;
- vpx_usec_timer_start(&emr_timer);
-
- {
- // Take tiles into account and give start/end MB
- int tile_col, tile_row;
- TOKENEXTRA *tp = cpi->tok;
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int tile_rows = 1 << cm->log2_tile_rows;
-
- for (tile_row = 0; tile_row < tile_rows; tile_row++) {
- for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- TileInfo tile;
- TOKENEXTRA *tp_old = tp;
-
- // For each row of SBs in the frame
- vp9_tile_init(&tile, cm, tile_row, tile_col);
- for (mi_row = tile.mi_row_start;
- mi_row < tile.mi_row_end; mi_row += 8)
- encode_sb_row(cpi, &tile, mi_row, &tp);
-
- cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
- assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
- }
- }
- }
-
- vpx_usec_timer_mark(&emr_timer);
- cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
- }
-
- if (cpi->sf.skip_encode_sb) {
- int j;
- unsigned int intra_count = 0, inter_count = 0;
- for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
- intra_count += cm->counts.intra_inter[j][0];
- inter_count += cm->counts.intra_inter[j][1];
- }
- cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
- cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
- cpi->sf.skip_encode_frame &= cm->show_frame;
- } else {
- cpi->sf.skip_encode_frame = 0;
- }
-
-#if 0
- // Keep record of the total distortion this time around for future use
- cpi->last_frame_distortion = cpi->frame_distortion;
-#endif
-}
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
@@ -2572,28 +2477,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
&dummy_rate, &dummy_dist, 1);
}
}
+// end RTC play code
-
-static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+static void encode_frame_internal(VP9_COMP *cpi) {
int mi_row;
- MACROBLOCK * const x = &cpi->mb;
- VP9_COMMON * const cm = &cpi->common;
- MACROBLOCKD * const xd = &x->e_mbd;
+ MACROBLOCK *const x = &cpi->mb;
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
// fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
// cpi->common.current_video_frame, cpi->common.show_frame,
// cm->frame_type);
-// debug output
-#if DBG_PRNT_SEGMAP
- {
- FILE *statsfile;
- statsfile = fopen("segmap2.stt", "a");
- fprintf(statsfile, "\n");
- fclose(statsfile);
- }
-#endif
-
vp9_zero(cm->counts.switchable_interp);
vp9_zero(cpi->tx_stepdown_count);
@@ -2603,7 +2498,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
xd->last_mi = cm->prev_mi;
- vp9_zero(cpi->common.counts.mv);
+ vp9_zero(cm->counts.mv);
vp9_zero(cpi->coef_counts);
vp9_zero(cm->counts.eob_branch);
@@ -2616,7 +2511,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
vp9_initialize_rd_consts(cpi);
vp9_initialize_me_consts(cpi, cm->base_qindex);
switch_tx_mode(cpi);
- cpi->sf.always_this_block_size = BLOCK_16X16;
if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
// Initialize encode frame context.
@@ -2655,9 +2549,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
// For each row of SBs in the frame
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
- mi_row < tile.mi_row_end; mi_row += 8)
- encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
-
+ mi_row < tile.mi_row_end; mi_row += 8) {
+ if (cpi->sf.super_fast_rtc)
+ encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+ else
+ encode_sb_row(cpi, &tile, mi_row, &tp);
+ }
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
}
@@ -2687,8 +2584,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
cpi->last_frame_distortion = cpi->frame_distortion;
#endif
}
-// end RTC play code
-
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -2763,10 +2658,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
select_tx_mode(cpi);
cm->reference_mode = reference_mode;
- if (cpi->sf.super_fast_rtc)
- encode_rtc_frame_internal(cpi);
- else
- encode_frame_internal(cpi);
+ encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i) {
const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2846,10 +2738,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
} else {
// Force the usage of the BILINEAR interp_filter.
cm->interp_filter = BILINEAR;
- if (cpi->sf.super_fast_rtc)
- encode_rtc_frame_internal(cpi);
- else
- encode_frame_internal(cpi);
+ encode_frame_internal(cpi);
}
}
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 376a899e0..c7507c13b 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -19,7 +19,6 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_systemdependent.h"
-#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h"
@@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) {
vp9_subtract_block(16, 16, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
- vp9_fht16x16(tx_type, src_diff, coeff, diff_stride);
+ vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
if (!x->skip_recode) {
vp9_subtract_block(8, 8, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
- vp9_fht8x8(tx_type, src_diff, coeff, diff_stride);
+ vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_subtract_block(4, 4, src_diff, diff_stride,
src, p->src.stride, dst, pd->dst.stride);
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type);
+ vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, diff_stride);
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 838f74e8c..dc35044d6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb,
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
- int section_target_bandwitdh) {
+int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh) {
int q;
const int num_mbs = cpi->common.MBs;
int target_norm_bits_per_mb;
@@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
- const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats,
- section_target_bandwidth);
+ const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+ section_target_bandwidth);
rc->active_worst_quality = tmp_q;
rc->ni_av_qi = tmp_q;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 19b59815a..7e612183e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -10,20 +10,86 @@
#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
#define VP9_ENCODER_VP9_FIRSTPASS_H_
-#include "vp9/encoder/vp9_onyx_int.h"
#ifdef __cplusplus
extern "C" {
#endif
-void vp9_init_first_pass(VP9_COMP *cpi);
-void vp9_rc_get_first_pass_params(VP9_COMP *cpi);
-void vp9_first_pass(VP9_COMP *cpi);
-void vp9_end_first_pass(VP9_COMP *cpi);
+typedef struct {
+ double frame;
+ double intra_error;
+ double coded_error;
+ double sr_coded_error;
+ double ssim_weighted_pred_err;
+ double pcnt_inter;
+ double pcnt_motion;
+ double pcnt_second_ref;
+ double pcnt_neutral;
+ double MVr;
+ double mvr_abs;
+ double MVc;
+ double mvc_abs;
+ double MVrv;
+ double MVcv;
+ double mv_in_out_count;
+ double new_mv_count;
+ double duration;
+ double count;
+} FIRSTPASS_STATS;
-void vp9_init_second_pass(VP9_COMP *cpi);
-void vp9_rc_get_second_pass_params(VP9_COMP *cpi);
-void vp9_end_second_pass(VP9_COMP *cpi);
+struct twopass_rc {
+ unsigned int section_intra_rating;
+ unsigned int next_iiratio;
+ unsigned int this_iiratio;
+ FIRSTPASS_STATS total_stats;
+ FIRSTPASS_STATS this_frame_stats;
+ FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+ FIRSTPASS_STATS total_left_stats;
+ int first_pass_done;
+ int64_t bits_left;
+ int64_t clip_bits_total;
+ double avg_iiratio;
+ double modified_error_min;
+ double modified_error_max;
+ double modified_error_total;
+ double modified_error_left;
+ double kf_intra_err_min;
+ double gf_intra_err_min;
+ int static_scene_max_gf_interval;
+ int kf_bits;
+ // Remaining error from uncoded frames in a gf group. Two pass use only
+ int64_t gf_group_error_left;
+
+ // Projected total bits available for a key frame group of frames
+ int64_t kf_group_bits;
+
+ // Error score of frames still to be coded in kf group
+ int64_t kf_group_error_left;
+
+ // Projected Bits available for a group of frames including 1 GF or ARF
+ int64_t gf_group_bits;
+ // Bits for the golden frame or ARF - 2 pass only
+ int gf_bits;
+ int alt_extra_bits;
+
+ int sr_update_lag;
+
+ int kf_zeromotion_pct;
+ int gf_zeromotion_pct;
+};
+
+struct VP9_COMP;
+
+void vp9_init_first_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
+void vp9_first_pass(struct VP9_COMP *cpi);
+void vp9_end_first_pass(struct VP9_COMP *cpi);
+
+void vp9_init_second_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_end_second_pass(struct VP9_COMP *cpi);
+int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+ int section_target_bandwitdh);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index e6e59c05a..4b642e2b6 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -11,9 +11,12 @@
#include <stdlib.h>
#include "./vpx_config.h"
+
#include "vp9/common/vp9_common.h"
+
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_onyx_int.h"
struct lookahead_ctx {
unsigned int max_sz; /* Absolute size of the queue */
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 36591bd42..198e11cc2 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -475,11 +475,9 @@ static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
((col + range) <= x->mv_col_max);
}
-static INLINE int check_point(const MACROBLOCK *x, const MV *mv) {
- return (mv->col < x->mv_col_min) |
- (mv->col > x->mv_col_max) |
- (mv->row < x->mv_row_min) |
- (mv->row > x->mv_row_max);
+static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
+ return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
+ (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
}
#define CHECK_BETTER \
@@ -572,7 +570,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < num_candidates[t]; i++) {
this_mv.row = br + candidates[t][i].row;
this_mv.col = bc + candidates[t][i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * in_what_stride) +
this_mv.col;
@@ -616,7 +614,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < num_candidates[s]; i++) {
this_mv.row = br + candidates[s][i].row;
this_mv.col = bc + candidates[s][i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * in_what_stride) +
this_mv.col;
@@ -656,7 +654,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + (this_mv.row * (in_what_stride)) +
this_mv.col;
@@ -695,7 +693,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
for (i = 0; i < 4; i++) {
this_mv.row = br + neighbors[i].row;
this_mv.col = bc + neighbors[i].col;
- if (check_point(x, &this_mv))
+ if (!is_mv_in(x, &this_mv))
continue;
this_offset = base_offset + this_mv.row * in_what_stride +
this_mv.col;
@@ -1685,10 +1683,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
this_mv.row = ref_mv->row + neighbors[j].row;
this_mv.col = ref_mv->col + neighbors[j].col;
- if ((this_mv.col > x->mv_col_min) &&
- (this_mv.col < x->mv_col_max) &&
- (this_mv.row > x->mv_row_min) &&
- (this_mv.row < x->mv_row_max)) {
+ if (is_mv_in(x, &this_mv)) {
const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
this_mv.col];
thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
@@ -1875,10 +1870,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
this_mv.row = ref_mv->row + neighbors[j].row;
this_mv.col = ref_mv->col + neighbors[j].col;
- if ((this_mv.col > x->mv_col_min) &&
- (this_mv.col < x->mv_col_max) &&
- (this_mv.row > x->mv_row_min) &&
- (this_mv.row < x->mv_row_max)) {
+ if (is_mv_in(x, &this_mv)) {
const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
this_mv.col];
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2f1973793..58e58530d 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -572,7 +572,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
int speed) {
int i;
sf->adaptive_rd_thresh = 1;
- sf->recode_loop = (speed < 1);
+ sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
if (speed == 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
@@ -590,7 +590,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->adaptive_pred_interp_filter = 1;
sf->auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -626,7 +626,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
sf->last_partitioning_redo_frequency = 3;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -743,7 +743,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
int speed) {
sf->static_segmentation = 0;
sf->adaptive_rd_thresh = 1;
- sf->recode_loop = (speed < 1);
+ sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
if (speed == 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
@@ -761,7 +761,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->adaptive_pred_interp_filter = 1;
sf->auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -797,7 +797,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->last_partitioning_redo_frequency = 3;
sf->adaptive_rd_thresh = 2;
- sf->recode_loop = 2;
+ sf->recode_loop = ALLOW_RECODE_KFARFGF;
sf->use_lp32x32fdct = 1;
sf->mode_skip_start = 11;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -846,6 +846,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
}
if (speed >= 6) {
sf->super_fast_rtc = 1;
+ sf->always_this_block_size = BLOCK_16X16;
}
}
@@ -865,7 +866,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// best quality defaults
sf->RD = 1;
sf->search_method = NSTEP;
- sf->recode_loop = 1;
+ sf->recode_loop = ALLOW_RECODE;
sf->subpel_search_method = SUBPEL_TREE;
sf->subpel_iters_per_step = 2;
sf->subpel_force_stop = 0;
@@ -933,7 +934,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// No recode for 1 pass.
if (cpi->pass == 0) {
- sf->recode_loop = 0;
+ sf->recode_loop = DISALLOW_RECODE;
sf->optimize_coefficients = 0;
}
@@ -1144,6 +1145,123 @@ static int64_t rescale(int val, int64_t num, int denom) {
return (llval * llnum / llden);
}
+// Initialize layer context data from init_config().
+static void init_layer_context(VP9_COMP *const cpi) {
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ int temporal_layer = 0;
+ cpi->svc.spatial_layer_id = 0;
+ cpi->svc.temporal_layer_id = 0;
+ for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+ ++temporal_layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q];
+ lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality;
+ lrc->last_q[INTER_FRAME] = lrc->active_worst_quality;
+ lrc->ni_av_qi = lrc->active_worst_quality;
+ lrc->total_actual_bits = 0;
+ lrc->total_target_vs_actual = 0;
+ lrc->ni_tot_qi = 0;
+ lrc->tot_q = 0.0;
+ lrc->avg_q = 0.0;
+ lrc->ni_frames = 0;
+ lrc->decimation_count = 0;
+ lrc->decimation_factor = 0;
+ lrc->rate_correction_factor = 1.0;
+ lrc->key_frame_rate_correction_factor = 1.0;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
+ 1000;
+ lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level),
+ lc->target_bandwidth, 1000);
+ lrc->bits_off_target = lrc->buffer_level;
+ }
+}
+
+// Update the layer context from a change_config() call.
+static void update_layer_context_change_config(VP9_COMP *const cpi,
+ const int target_bandwidth) {
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ int temporal_layer = 0;
+ float bitrate_alloc = 1.0;
+ for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+ ++temporal_layer) {
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
+ bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
+ // Update buffer-related quantities.
+ lc->starting_buffer_level = oxcf->starting_buffer_level * bitrate_alloc;
+ lc->optimal_buffer_level = oxcf->optimal_buffer_level * bitrate_alloc;
+ lc->maximum_buffer_size = oxcf->maximum_buffer_size * bitrate_alloc;
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
+ // Update framerate-related quantities.
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+ lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ // Update qp-related quantities.
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
+ lrc->active_worst_quality = rc->active_worst_quality;
+ }
+}
+
+// Prior to encoding the frame, update framerate-related quantities
+// for the current layer.
+static void update_layer_framerate(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+ lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+ // Update the average layer frame size (non-cumulative per-frame-bw).
+ if (temporal_layer == 0) {
+ lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+ } else {
+ double prev_layer_framerate = oxcf->framerate /
+ oxcf->ts_rate_decimator[temporal_layer - 1];
+ int prev_layer_target_bandwidth =
+ oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+ lc->avg_frame_size =
+ (int)(lc->target_bandwidth - prev_layer_target_bandwidth) /
+ (lc->framerate - prev_layer_framerate);
+ }
+}
+
+// Prior to encoding the frame, set the layer context, for the current layer
+// to be encoded, to the cpi struct.
+static void restore_layer_context(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ int frame_since_key = cpi->rc.frames_since_key;
+ int frame_to_key = cpi->rc.frames_to_key;
+ cpi->rc = lc->rc;
+ cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+ cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
+ cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
+ cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
+ cpi->output_framerate = lc->framerate;
+ // Reset the frames_since_key and frames_to_key counters to their values
+ // before the layer restore. Keep these defined for the stream (not layer).
+ cpi->rc.frames_since_key = frame_since_key;
+ cpi->rc.frames_to_key = frame_to_key;
+}
+
+// Save the layer context after encoding the frame.
+static void save_layer_context(VP9_COMP *const cpi) {
+ int temporal_layer = cpi->svc.temporal_layer_id;
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ lc->rc = cpi->rc;
+ lc->target_bandwidth = cpi->oxcf.target_bandwidth;
+ lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
+ lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
+ lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+ lc->framerate = cpi->output_framerate;
+}
+
static void set_tile_limits(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -1170,6 +1288,16 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cm->subsampling_y = 0;
vp9_alloc_compressor_data(cpi);
+ // Spatial scalability.
+ cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
+ // Temporal scalability.
+ cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
+
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ init_layer_context(cpi);
+ }
+
// change includes all joint functionality
vp9_change_config(ptr, oxcf);
@@ -1210,9 +1338,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->gld_fb_idx = 1;
cpi->alt_fb_idx = 2;
- cpi->current_layer = 0;
- cpi->use_svc = 0;
-
set_tile_limits(cpi);
cpi->fixed_divide[0] = 0;
@@ -1220,7 +1345,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->fixed_divide[i] = 0x80000 / i;
}
-
void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
VP9_COMP *cpi = (VP9_COMP *)(ptr);
VP9_COMMON *const cm = &cpi->common;
@@ -1312,10 +1436,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
cpi->oxcf.target_bandwidth, 1000);
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
- if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) {
- cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
- cpi->rc.buffer_level = cpi->rc.bits_off_target;
- }
+ cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target,
+ cpi->oxcf.maximum_buffer_size);
+ cpi->rc.buffer_level = MIN(cpi->rc.buffer_level,
+ cpi->oxcf.maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
vp9_new_framerate(cpi, cpi->oxcf.framerate);
@@ -1350,6 +1474,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
}
update_frame_size(cpi);
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_context_change_config(cpi, cpi->oxcf.target_bandwidth);
+ }
+
cpi->speed = cpi->oxcf.cpu_used;
if (cpi->oxcf.lag_in_frames == 0) {
@@ -1573,6 +1702,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
vp9_create_common(cm);
+ cpi->use_svc = 0;
+
init_config((VP9_PTR)cpi, oxcf);
init_pick_mode_context(cpi);
@@ -1588,9 +1719,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
cpi->alt_is_last = 0;
cpi->gold_is_alt = 0;
- // Spatial scalability
- cpi->number_spatial_layers = oxcf->ss_number_layers;
-
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(cm, cpi->segmentation_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
@@ -1616,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
-#ifdef ENTROPY_STATS
- if (cpi->pass != 1)
- init_context_counters();
-#endif
-
/*Initialize the feed-forward activity masking.*/
cpi->activity_avg = 90 << 12;
cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2431,8 +2554,8 @@ static int recode_loop_test(const VP9_COMP *cpi,
// Is frame recode allowed.
// Yes if either recode mode 1 is selected or mode 2 is selected
// and the frame is a key frame, golden frame or alt_ref_frame
- } else if ((cpi->sf.recode_loop == 1) ||
- ((cpi->sf.recode_loop == 2) &&
+ } else if ((cpi->sf.recode_loop == ALLOW_RECODE) ||
+ ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) &&
(cm->frame_type == KEY_FRAME ||
cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
// General over and under shoot tests
@@ -2651,25 +2774,67 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
}
#endif
+static void encode_without_recode_loop(VP9_COMP *cpi,
+ size_t *size,
+ uint8_t *dest,
+ int q) {
+ VP9_COMMON *const cm = &cpi->common;
+ vp9_clear_system_state(); // __asm emms;
+ vp9_set_quantizer(cpi, q);
+
+ // Set up entropy context depending on frame type. The decoder mandates
+ // the use of the default context, index 0, for keyframes and inter
+ // frames where the error_resilient_mode or intra_only flag is set. For
+ // other inter-frames the encoder currently uses only two contexts;
+ // context 1 for ALTREF frames and context 0 for the others.
+ if (cm->frame_type == KEY_FRAME) {
+ vp9_setup_key_frame(cpi);
+ } else {
+ if (!cm->intra_only && !cm->error_resilient_mode) {
+ cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
+ }
+ vp9_setup_inter_frame(cpi);
+ }
+ // Variance adaptive and in frame q adjustment experiments are mutually
+ // exclusive.
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_vaq_frame_setup(cpi);
+ } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+ setup_in_frame_q_adj(cpi);
+ }
+ // transform / motion compensation build reconstruction frame
+ vp9_encode_frame(cpi);
+
+ // Update the skip mb flag probabilities based on the distribution
+ // seen in the last encoder iteration.
+ // update_base_skip_probs(cpi);
+ vp9_clear_system_state(); // __asm emms;
+}
+
static void encode_with_recode_loop(VP9_COMP *cpi,
size_t *size,
uint8_t *dest,
- int *q,
+ int q,
int bottom_index,
- int top_index,
- int frame_over_shoot_limit,
- int frame_under_shoot_limit) {
+ int top_index) {
VP9_COMMON *const cm = &cpi->common;
int loop_count = 0;
int loop = 0;
int overshoot_seen = 0;
int undershoot_seen = 0;
int q_low = bottom_index, q_high = top_index;
+ int frame_over_shoot_limit;
+ int frame_under_shoot_limit;
+
+ // Decide frame size bounds
+ vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
+ &frame_under_shoot_limit,
+ &frame_over_shoot_limit);
do {
vp9_clear_system_state(); // __asm emms;
- vp9_set_quantizer(cpi, *q);
+ vp9_set_quantizer(cpi, q);
if (loop_count == 0) {
// Set up entropy context depending on frame type. The decoder mandates
@@ -2696,7 +2861,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
}
// transform / motion compensation build reconstruction frame
-
vp9_encode_frame(cpi);
// Update the skip mb flag probabilities based on the distribution
@@ -2708,7 +2872,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
// Dummy pack of the bitstream using up to date stats to get an
// accurate estimate of output frame size to determine if we need
// to recode.
- if (cpi->sf.recode_loop != 0) {
+ if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
vp9_save_coding_context(cpi);
cpi->dummy_packing = 1;
if (!cpi->sf.super_fast_rtc)
@@ -2727,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if ((cm->frame_type == KEY_FRAME) &&
cpi->rc.this_key_frame_forced &&
(cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) {
- int last_q = *q;
+ int last_q = q;
int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
int high_err_target = cpi->ambient_err;
@@ -2743,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
(kf_err > low_err_target &&
cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
// Lower q_high
- q_high = *q > q_low ? *q - 1 : q_low;
+ q_high = q > q_low ? q - 1 : q_low;
// Adjust Q
- *q = ((*q) * high_err_target) / kf_err;
- *q = MIN((*q), (q_high + q_low) >> 1);
+ q = (q * high_err_target) / kf_err;
+ q = MIN(q, (q_high + q_low) >> 1);
} else if (kf_err < low_err_target &&
cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
// The key frame is much better than the previous frame
// Raise q_low
- q_low = *q < q_high ? *q + 1 : q_high;
+ q_low = q < q_high ? q + 1 : q_high;
// Adjust Q
- *q = ((*q) * low_err_target) / kf_err;
- *q = MIN((*q), (q_high + q_low + 1) >> 1);
+ q = (q * low_err_target) / kf_err;
+ q = MIN(q, (q_high + q_low + 1) >> 1);
}
// Clamp Q to upper and lower limits:
- *q = clamp(*q, q_low, q_high);
+ q = clamp(q, q_low, q_high);
- loop = *q != last_q;
+ loop = q != last_q;
} else if (recode_loop_test(
cpi, frame_over_shoot_limit, frame_under_shoot_limit,
- *q, MAX(q_high, top_index), bottom_index)) {
+ q, MAX(q_high, top_index), bottom_index)) {
// Is the projected frame size out of range and are we allowed
// to attempt to recode.
- int last_q = *q;
+ int last_q = q;
int retries = 0;
// Frame size out of permitted range:
@@ -2781,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
q_high = cpi->rc.worst_quality;
// Raise Qlow as to at least the current value
- q_low = *q < q_high ? *q + 1 : q_high;
+ q_low = q < q_high ? q + 1 : q_high;
if (undershoot_seen || loop_count > 1) {
// Update rate_correction_factor unless
vp9_rc_update_rate_correction_factors(cpi, 1);
- *q = (q_high + q_low + 1) / 2;
+ q = (q_high + q_low + 1) / 2;
} else {
// Update rate_correction_factor unless
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
bottom_index, MAX(q_high, top_index));
- while (*q < q_low && retries < 10) {
+ while (q < q_low && retries < 10) {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
bottom_index, MAX(q_high, top_index));
retries++;
}
@@ -2806,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
overshoot_seen = 1;
} else {
// Frame is too small
- q_high = *q > q_low ? *q - 1 : q_low;
+ q_high = q > q_low ? q - 1 : q_low;
if (overshoot_seen || loop_count > 1) {
vp9_rc_update_rate_correction_factors(cpi, 1);
- *q = (q_high + q_low) / 2;
+ q = (q_high + q_low) / 2;
} else {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
bottom_index, top_index);
// Special case reset for qlow for constrained quality.
// This should only trigger where there is very substantial
// undershoot on a frame and the auto cq level is above
// the user passsed in value.
if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
- *q < q_low) {
- q_low = *q;
+ q < q_low) {
+ q_low = q;
}
- while (*q > q_high && retries < 10) {
+ while (q > q_high && retries < 10) {
vp9_rc_update_rate_correction_factors(cpi, 0);
- *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+ q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
bottom_index, top_index);
retries++;
}
@@ -2836,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
}
// Clamp Q to upper and lower limits:
- *q = clamp(*q, q_low, q_high);
+ q = clamp(q, q_low, q_high);
- loop = *q != last_q;
+ loop = q != last_q;
} else {
loop = 0;
}
@@ -2911,20 +3075,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
TX_SIZE t;
int q;
- int frame_over_shoot_limit;
- int frame_under_shoot_limit;
int top_index;
int bottom_index;
SPEED_FEATURES *const sf = &cpi->sf;
- unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
+ unsigned int max_mv_def = MIN(cm->width, cm->height);
struct segmentation *const seg = &cm->seg;
set_ext_overrides(cpi);
/* Scale the source buffer, if required. */
- if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width ||
- cm->mi_rows * 8 != cpi->un_scaled_source->y_height) {
+ if (cm->mi_cols * MI_SIZE != cpi->un_scaled_source->y_width ||
+ cm->mi_rows * MI_SIZE != cpi->un_scaled_source->y_height) {
scale_and_extend_frame_nonnormative(cpi->un_scaled_source,
&cpi->scaled_source);
cpi->Source = &cpi->scaled_source;
@@ -2933,12 +3095,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
scale_references(cpi);
- // Clear down mmx registers to allow floating point in what follows.
vp9_clear_system_state();
- // Clear zbin over-quant value and mode boost values.
- cpi->zbin_mode_boost = 0;
-
// Enable or disable mode based tweaking of the zbin.
// For 2 pass only used where GF/ARF prediction quality
// is above a threshold.
@@ -2946,7 +3104,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->zbin_mode_boost_enabled = 0;
// Current default encoder behavior for the altref sign bias.
- cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
+ cm->ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
@@ -2955,7 +3113,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
// Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
if (sf->auto_mv_step_size) {
- if (frame_is_intra_only(&cpi->common)) {
+ if (frame_is_intra_only(cm)) {
// Initialize max_mv_magnitude for use in the first INTER frame
// after a key/intra-only frame.
cpi->max_mv_magnitude = max_mv_def;
@@ -2964,8 +3122,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Allow mv_steps to correspond to twice the max mv magnitude found
// in the previous frame, capped by the default max_mv_magnitude based
// on resolution.
- cpi->mv_step_param = vp9_init_search_range(
- cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 *
+ cpi->max_mv_magnitude));
cpi->max_mv_magnitude = 0;
}
}
@@ -3002,9 +3160,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// static regions if indicated.
// Only allowed in second pass of two pass (as requires lagged coding)
// and if the relevant speed feature flag is set.
- if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+ if (cpi->pass == 2 && cpi->sf.static_segmentation)
configure_static_seg_features(cpi);
- }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
@@ -3013,7 +3170,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cm->frame_type != KEY_FRAME) {
if (vp9_rc_drop_frame(cpi)) {
vp9_rc_postencode_update_drop_frame(cpi);
- cm->current_video_frame++;
+ ++cm->current_video_frame;
return;
}
}
@@ -3051,41 +3208,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_write_yuv_frame(cpi->Source);
#endif
- // Decide frame size bounds
- vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
- &frame_under_shoot_limit,
- &frame_over_shoot_limit);
-
// Decide q and q bounds.
q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
&bottom_index,
&top_index);
- // JBB : This is realtime mode. In real time mode the first frame
- // should be larger. Q of 0 is disabled because we force tx size to be
- // 16x16...
- if (cpi->sf.super_fast_rtc) {
- if (cpi->common.current_video_frame == 0)
- q /= 3;
-
- if (q == 0)
- q++;
- }
-
if (!frame_is_intra_only(cm)) {
cm->interp_filter = DEFAULT_INTERP_FILTER;
/* TODO: Decide this more intelligently */
set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
}
- encode_with_recode_loop(cpi,
- size,
- dest,
- &q,
- bottom_index,
- top_index,
- frame_over_shoot_limit,
- frame_under_shoot_limit);
+ if (cpi->sf.recode_loop == DISALLOW_RECODE) {
+ encode_without_recode_loop(cpi, size, dest, q);
+ } else {
+ encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
+ }
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
@@ -3132,37 +3270,30 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
update_reference_frames(cpi);
for (t = TX_4X4; t <= TX_32X32; t++)
- full_to_model_counts(cpi->common.counts.coef[t],
- cpi->coef_counts[t]);
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_coef_probs(&cpi->common);
- }
-
- if (!frame_is_intra_only(&cpi->common)) {
- if (!cpi->common.error_resilient_mode &&
- !cpi->common.frame_parallel_decoding_mode) {
- vp9_adapt_mode_probs(&cpi->common);
- vp9_adapt_mv_probs(&cpi->common, cpi->common.allow_high_precision_mv);
+ full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
+
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
+ vp9_adapt_coef_probs(cm);
+
+ if (!frame_is_intra_only(cm)) {
+ if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+ vp9_adapt_mode_probs(cm);
+ vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
}
}
-#ifdef ENTROPY_STATS
- vp9_update_mode_context_stats(cpi);
-#endif
-
#if 0
output_frame_level_debug_stats(cpi);
#endif
if (cpi->refresh_golden_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
+ cm->frame_flags |= FRAMEFLAGS_GOLDEN;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
+ cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
if (cpi->refresh_alt_ref_frame == 1)
- cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
+ cm->frame_flags |= FRAMEFLAGS_ALTREF;
else
- cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
+ cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
get_ref_frame_flags(cpi);
@@ -3211,6 +3342,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// reset to normal state now that we are done.
if (!cm->show_existing_frame)
cm->last_show_frame = cm->show_frame;
+
if (cm->show_frame) {
// current mip will be the prev_mip for the next frame
MODE_INFO *temp = cm->prev_mip;
@@ -3231,6 +3363,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// update not a real frame
++cm->current_video_frame;
}
+
// restore prev_mi
cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
@@ -3516,6 +3649,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
adjust_frame_rate(cpi);
}
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_framerate(cpi);
+ restore_layer_context(cpi);
+ }
+
// start with a 0 size frame
*size = 0;
@@ -3591,6 +3730,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
cpi->droppable = !frame_is_reference(cpi);
}
+ // Save layer specific state.
+ if (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ save_layer_context(cpi);
+ }
+
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index d0ca962db..90ed606f6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -23,6 +23,7 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_mbgraph.h"
#include "vp9/encoder/vp9_mcomp.h"
@@ -44,8 +45,9 @@ extern "C" {
#else
#define MIN_GF_INTERVAL 4
#endif
-#define DEFAULT_GF_INTERVAL 7
+#define DEFAULT_GF_INTERVAL 10
#define DEFAULT_KF_BOOST 2000
+#define DEFAULT_GF_BOOST 2000
#define KEY_FRAME_CONTEXT 5
@@ -78,28 +80,6 @@ typedef struct {
FRAME_CONTEXT fc;
} CODING_CONTEXT;
-typedef struct {
- double frame;
- double intra_error;
- double coded_error;
- double sr_coded_error;
- double ssim_weighted_pred_err;
- double pcnt_inter;
- double pcnt_motion;
- double pcnt_second_ref;
- double pcnt_neutral;
- double MVr;
- double mvr_abs;
- double MVc;
- double mvc_abs;
- double MVrv;
- double MVcv;
- double mv_in_out_count;
- double new_mv_count;
- double duration;
- double count;
-} FIRSTPASS_STATS;
-
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
@@ -217,6 +197,17 @@ typedef enum {
LAST_FRAME_PARTITION_ALL = 2
} LAST_FRAME_PARTITION_METHOD;
+typedef enum {
+ // No recode.
+ DISALLOW_RECODE = 0,
+ // Allow recode for KF and exceeding maximum frame bandwidth.
+ ALLOW_RECODE_KFMAXBW = 1,
+ // Allow recode only for KF/ARF/GF frames.
+ ALLOW_RECODE_KFARFGF = 2,
+ // Allow recode for all frames based on bitrate constraints.
+ ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
typedef struct {
// This flag refers to whether or not to perform rd optimization.
int RD;
@@ -224,11 +215,7 @@ typedef struct {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
- // Recode_loop can be:
- // 0 means we only encode a frame once
- // 1 means we can re-encode based on bitrate constraints on any frame
- // 2 means we can only recode gold, alt, and key frames.
- int recode_loop;
+ RECODE_LOOP_TYPE recode_loop;
// Subpel_search_method can only be subpel_tree which does a subpixel
// logarithmic search that keeps stepping at 1/2 pixel units until
@@ -407,6 +394,16 @@ typedef struct {
int super_fast_rtc;
} SPEED_FEATURES;
+typedef struct {
+ RATE_CONTROL rc;
+ int target_bandwidth;
+ int64_t starting_buffer_level;
+ int64_t optimal_buffer_level;
+ int64_t maximum_buffer_size;
+ double framerate;
+ int avg_frame_size;
+} LAYER_CONTEXT;
+
typedef struct VP9_COMP {
DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -451,9 +448,6 @@ typedef struct VP9_COMP {
int gld_fb_idx;
int alt_fb_idx;
- int current_layer;
- int use_svc;
-
#if CONFIG_MULTIPLE_ARF
int alt_ref_fb_idx[REF_FRAMES - 3];
#endif
@@ -573,46 +567,7 @@ typedef struct VP9_COMP {
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
- struct twopass_rc {
- unsigned int section_intra_rating;
- unsigned int next_iiratio;
- unsigned int this_iiratio;
- FIRSTPASS_STATS total_stats;
- FIRSTPASS_STATS this_frame_stats;
- FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
- FIRSTPASS_STATS total_left_stats;
- int first_pass_done;
- int64_t bits_left;
- int64_t clip_bits_total;
- double avg_iiratio;
- double modified_error_min;
- double modified_error_max;
- double modified_error_total;
- double modified_error_left;
- double kf_intra_err_min;
- double gf_intra_err_min;
- int static_scene_max_gf_interval;
- int kf_bits;
- // Remaining error from uncoded frames in a gf group. Two pass use only
- int64_t gf_group_error_left;
-
- // Projected total bits available for a key frame group of frames
- int64_t kf_group_bits;
-
- // Error score of frames still to be coded in kf group
- int64_t kf_group_error_left;
-
- // Projected Bits available for a group of frames including 1 GF or ARF
- int64_t gf_group_bits;
- // Bits for the golden frame or ARF - 2 pass only
- int gf_bits;
- int alt_extra_bits;
-
- int sr_update_lag;
-
- int kf_zeromotion_pct;
- int gf_zeromotion_pct;
- } twopass;
+ struct twopass_rc twopass;
YV12_BUFFER_CONFIG alt_ref_buffer;
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
@@ -669,7 +624,18 @@ typedef struct VP9_COMP {
int initial_width;
int initial_height;
- int number_spatial_layers;
+ int use_svc;
+
+ struct svc {
+ int spatial_layer_id;
+ int temporal_layer_id;
+ int number_spatial_layers;
+ int number_temporal_layers;
+ // Layer context used for rate control in CBR mode, only defined for
+ // temporal layers for now.
+ LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
+ } svc;
+
int enable_encode_breakout; // Default value is 1. From first pass stats,
// encode_breakout may be disabled.
@@ -726,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi);
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
-
void vp9_set_speed_features(VP9_COMP *cpi);
int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index a2eea1cd7..862573f3f 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -79,55 +79,47 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *dequant_ptr,
int zbin_oq_value, uint16_t *eob_ptr,
const int16_t *scan, const int16_t *iscan) {
- int i, rc, eob;
- int zbins[2], nzbins[2];
- int x, y, z, sz;
+ const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
+ ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
+ const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
int idx = 0;
int idx_arr[1024];
+ int i, eob = -1;
- vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
- vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
- eob = -1;
-
- // Base ZBIN
- zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
- zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
- nzbins[0] = zbins[0] * -1;
- nzbins[1] = zbins[1] * -1;
+ vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+ vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
if (!skip_block) {
// Pre-scan pass
for (i = 0; i < n_coeffs; i++) {
- rc = scan[i];
- z = coeff_ptr[rc];
+ const int rc = scan[i];
+ const int coeff = coeff_ptr[rc];
// If the coefficient is out of the base ZBIN range, keep it for
// quantization.
- if (z >= zbins[rc != 0] || z <= nzbins[rc != 0])
+ if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
idx_arr[idx++] = i;
}
// Quantization pass: only process the coefficients selected in
// pre-scan pass. Note: idx can be zero.
for (i = 0; i < idx; i++) {
- rc = scan[idx_arr[i]];
-
- z = coeff_ptr[rc];
- sz = (z >> 31); // sign of z
- x = (z ^ sz) - sz; // x = abs(z)
-
- x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
- x = clamp(x, INT16_MIN, INT16_MAX);
- y = ((((x * quant_ptr[rc != 0]) >> 16) + x) *
- quant_shift_ptr[rc != 0]) >> 15; // quantize (x)
-
- x = (y ^ sz) - sz; // get the sign back
- qcoeff_ptr[rc] = x; // write to destination
- dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / 2; // dequantized value
-
- if (y)
- eob = idx_arr[i]; // last nonzero coeffs
+ const int rc = scan[idx_arr[i]];
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ int tmp;
+ int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+ abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+ tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+ quant_shift_ptr[rc != 0]) >> 15;
+
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+ if (tmp)
+ eob = idx_arr[i];
}
}
*eob_ptr = eob + 1;
@@ -136,8 +128,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan) {
MACROBLOCKD *const xd = &x->e_mbd;
- struct macroblock_plane* p = &x->plane[plane];
- struct macroblockd_plane* pd = &xd->plane[plane];
+ struct macroblock_plane *p = &x->plane[plane];
+ struct macroblockd_plane *pd = &xd->plane[plane];
vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
16, x->skip_block,
@@ -223,38 +215,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
}
void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
- int i;
- VP9_COMMON *const cm = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- int zbin_extra;
- int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
- const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id,
- cpi->common.base_qindex);
-
- int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+ const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+ const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+ const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+ const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj;
+ int i;
// Y
- zbin_extra = (cpi->common.y_dequant[qindex][1] *
- (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
x->plane[0].quant = cpi->y_quant[qindex];
x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
x->plane[0].zbin = cpi->y_zbin[qindex];
x->plane[0].round = cpi->y_round[qindex];
- x->plane[0].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
+ x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
+ xd->plane[0].dequant = cm->y_dequant[qindex];
// UV
- zbin_extra = (cpi->common.uv_dequant[qindex][1] *
- (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
for (i = 1; i < 3; i++) {
x->plane[i].quant = cpi->uv_quant[qindex];
x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
x->plane[i].zbin = cpi->uv_zbin[qindex];
x->plane[i].round = cpi->uv_round[qindex];
- x->plane[i].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
+ x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
+ xd->plane[i].dequant = cm->uv_dequant[qindex];
}
#if CONFIG_ALPHA
@@ -263,18 +247,14 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[3].zbin = cpi->a_zbin[qindex];
x->plane[3].round = cpi->a_round[qindex];
x->plane[3].zbin_extra = (int16_t)zbin_extra;
- x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
+ xd->plane[3].dequant = cm->a_dequant[qindex];
#endif
- x->skip_block = vp9_segfeature_active(&cpi->common.seg, segment_id,
- SEG_LVL_SKIP);
-
- /* save this macroblock QIndex for vp9_update_zbin_extra() */
+ x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
- /* R/D setup */
- cpi->mb.errorperbit = rdmult >> 6;
- cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
+ x->errorperbit = rdmult >> 6;
+ x->errorperbit += (x->errorperbit == 0);
vp9_initialize_me_consts(cpi, x->q_index);
}
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index abbf39b81..b3e9f4538 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -215,12 +215,12 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
rc->av_per_frame_bandwidth >> 5);
if (target < min_frame_target)
target = min_frame_target;
- if (cpi->refresh_golden_frame && rc->source_alt_ref_active) {
+ if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
// If there is an active ARF at this location use the minimum
// bits on this frame even if it is a constructed arf.
// The active maximum quantizer insures that an appropriate
// number of bits will be spent if needed for constructed ARFs.
- target = 0;
+ target = min_frame_target;
}
// Clip the frame target to the maximum allowed value.
if (target > rc->max_frame_bandwidth)
@@ -241,6 +241,26 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
return target;
}
+
+// Update the buffer level for higher layers, given the encoded current layer.
+static void update_layer_buffer_level(VP9_COMP *const cpi,
+ int encoded_frame_size) {
+ int temporal_layer = 0;
+ int current_temporal_layer = cpi->svc.temporal_layer_id;
+ for (temporal_layer = current_temporal_layer + 1;
+ temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
+ encoded_frame_size);
+ lrc->bits_off_target += bits_off_for_this_layer;
+
+ // Clip buffer level to maximum buffer size for the layer.
+ lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+ lrc->buffer_level = lrc->bits_off_target;
+ }
+}
+
// Update the buffer level: leaky bucket model.
static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
const VP9_COMMON *const cm = &cpi->common;
@@ -255,14 +275,18 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
}
// Clip the buffer level to the maximum specified buffer size.
- rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+ rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+ rc->buffer_level = rc->bits_off_target;
+
+ if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ update_layer_buffer_level(cpi, encoded_frame_size);
+ }
}
int vp9_rc_drop_frame(VP9_COMP *cpi) {
const VP9_CONFIG *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
-
if (!oxcf->drop_frames_water_mark) {
return 0;
} else {
@@ -273,7 +297,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark = (int)(oxcf->drop_frames_water_mark *
- oxcf->optimal_buffer_level / 100);
+ oxcf->optimal_buffer_level / 100);
if ((rc->buffer_level > drop_mark) &&
(rc->decimation_factor > 0)) {
--rc->decimation_factor;
@@ -301,7 +325,8 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
if (cpi->common.frame_type == KEY_FRAME) {
return cpi->rc.key_frame_rate_correction_factor;
} else {
- if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
return cpi->rc.gf_rate_correction_factor;
else
return cpi->rc.rate_correction_factor;
@@ -312,7 +337,8 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
if (cpi->common.frame_type == KEY_FRAME) {
cpi->rc.key_frame_rate_correction_factor = factor;
} else {
- if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+ if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
cpi->rc.gf_rate_correction_factor = factor;
else
cpi->rc.rate_correction_factor = factor;
@@ -461,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
double q_adj_factor = 1.0;
double q_val;
- // Baseline value derived from cpi->active_worst_quality and kf boost
- active_best_quality = get_active_quality(active_worst_quality,
+ active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
rc->kf_boost,
kf_low, kf_high,
kf_low_motion_minq,
@@ -495,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
q = rc->avg_frame_qindex[INTER_FRAME];
} else {
- q = active_worst_quality;
+ q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ?
+ active_worst_quality : rc->avg_frame_qindex[KEY_FRAME];
}
// For constrained quality dont allow Q less than the cq level
if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
@@ -538,7 +564,25 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
active_best_quality = cpi->cq_target_quality;
} else {
- active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ // Use the lower of active_worst_quality and recent/average Q.
+ if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+ if (cm->current_video_frame > 1) {
+ if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ else
+ active_best_quality = inter_minq[active_worst_quality];
+ } else {
+ if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ else
+ active_best_quality = inter_minq[active_worst_quality];
+ }
+ } else {
+ if (cm->current_video_frame > 1)
+ active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+ else
+ active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+ }
// For the constrained quality mode we don't want
// q to fall below the cq level.
if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -574,7 +618,6 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
*top_index = (active_worst_quality + active_best_quality) / 2;
}
#endif
-
if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
q = active_best_quality;
// Special case code to try and match quality with forced key frames
@@ -810,12 +853,28 @@ static int rc_pick_q_and_adjust_q_bounds_two_pass(const VP9_COMP *cpi,
int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
int *bottom_index,
int *top_index) {
+ int q;
if (cpi->pass == 0)
- return rc_pick_q_and_adjust_q_bounds_one_pass(
+ q = rc_pick_q_and_adjust_q_bounds_one_pass(
cpi, bottom_index, top_index);
else
- return rc_pick_q_and_adjust_q_bounds_two_pass(
+ q = rc_pick_q_and_adjust_q_bounds_two_pass(
cpi, bottom_index, top_index);
+
+ // JBB : This is realtime mode. In real time mode the first frame
+ // should be larger. Q of 0 is disabled because we force tx size to be
+ // 16x16...
+ if (cpi->sf.super_fast_rtc) {
+ if (cpi->common.current_video_frame == 0)
+ q /= 3;
+ if (q == 0)
+ q++;
+ if (q < *bottom_index)
+ *bottom_index = q;
+ else if (q > *top_index)
+ *top_index = q;
+ }
+ return q;
}
void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
@@ -918,7 +977,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->projected_frame_size = (bytes_used << 3);
// Post encode loop adjustment of Q prediction.
- vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+ vp9_rc_update_rate_correction_factors(
+ cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
// Keep a record of last Q and ambient average Q.
@@ -927,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
} else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+ !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
rc->last_q[2] = cm->base_qindex;
rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1002,21 +1063,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
cpi->rc.frames_to_key--;
}
-void vp9_rc_get_svc_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- if ((cm->current_video_frame == 0) ||
- (cm->frame_flags & FRAMEFLAGS_KEY) ||
- (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
- cpi->key_frame_frequency == 0))) {
- cm->frame_type = KEY_FRAME;
- cpi->rc.source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
- cpi->rc.frames_till_gf_update_due = INT_MAX;
- cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
static int test_for_kf_one_pass(VP9_COMP *cpi) {
// Placeholder function for auto key frame
return 0;
@@ -1025,62 +1071,88 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
#define USE_ALTREF_FOR_ONE_PASS 1
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+ static const int af_ratio = 10;
const RATE_CONTROL *rc = &cpi->rc;
- int target = rc->av_per_frame_bandwidth;
- target = vp9_rc_clamp_pframe_target_size(cpi, target);
- return target;
+ int target;
+#if USE_ALTREF_FOR_ONE_PASS
+ target = (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ?
+ (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) /
+ (cpi->rc.baseline_gf_interval + af_ratio - 1) :
+ (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) /
+ (cpi->rc.baseline_gf_interval + af_ratio - 1);
+#else
+ target = rc->av_per_frame_bandwidth;
+#endif
+ return vp9_rc_clamp_pframe_target_size(cpi, target);
}
static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+ static const int kf_ratio = 25;
const RATE_CONTROL *rc = &cpi->rc;
- int target = rc->av_per_frame_bandwidth * 8;
- target = vp9_rc_clamp_iframe_target_size(cpi, target);
- return target;
+ int target = rc->av_per_frame_bandwidth * kf_ratio;
+ return vp9_rc_clamp_iframe_target_size(cpi, target);
+}
+
+static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+ int active_worst_quality;
+ if (cpi->common.frame_type == KEY_FRAME) {
+ if (cpi->common.current_video_frame == 0) {
+ active_worst_quality = cpi->rc.worst_quality;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+ }
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ if (cpi->common.current_video_frame == 1) {
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[INTER_FRAME];
+ }
+ } else {
+ if (cpi->common.current_video_frame == 1) {
+ active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+ } else {
+ // Choose active worst quality twice as large as the last q.
+ active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+ }
+ }
+ if (active_worst_quality > cpi->rc.worst_quality)
+ active_worst_quality = cpi->rc.worst_quality;
+ return active_worst_quality;
}
void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
int target;
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 ||
cm->frame_flags & FRAMEFLAGS_KEY ||
- cpi->rc.frames_to_key == 0 ||
+ rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
- cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
- cpi->rc.frames_to_key == 0;
- cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = DEFAULT_KF_BOOST;
- cpi->rc.source_alt_ref_active = 0;
- if (cm->current_video_frame == 0) {
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
- } else {
- // Choose active worst quality twice as large as the last q.
- cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
- if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
- }
+ rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+ rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->key_frame_frequency;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
} else {
cm->frame_type = INTER_FRAME;
- if (cm->current_video_frame == 1) {
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
- } else {
- // Choose active worst quality twice as large as the last q.
- cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
- if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
- }
}
- if (cpi->rc.frames_till_gf_update_due == 0) {
- cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
- cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
+ if (rc->frames_till_gf_update_due == 0) {
+ rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
// NOTE: frames_till_gf_update_due must be <= frames_to_key.
- if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
- cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
cpi->refresh_golden_frame = 1;
- cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
- cpi->rc.gfu_boost = 2000;
+ rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
}
+ cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
if (cm->frame_type == KEY_FRAME)
target = calc_iframe_target_size_one_pass_vbr(cpi);
else
@@ -1099,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
const RATE_CONTROL *rc = &cpi->rc;
int active_worst_quality = rc->active_worst_quality;
// Maximum limit for down adjustment, ~20%.
- int max_adjustment_down = active_worst_quality / 5;
// Buffer level below which we push active_worst to worst_quality.
int critical_level = oxcf->optimal_buffer_level >> 2;
int adjustment = 0;
int buff_lvl_step = 0;
+ if (cpi->common.frame_type == KEY_FRAME)
+ return rc->worst_quality;
if (rc->buffer_level > oxcf->optimal_buffer_level) {
// Adjust down.
+ int max_adjustment_down = active_worst_quality / 5;
if (max_adjustment_down) {
buff_lvl_step = (int)((oxcf->maximum_buffer_size -
oxcf->optimal_buffer_level) / max_adjustment_down);
@@ -1135,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const VP9_CONFIG *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc;
- int target = rc->av_per_frame_bandwidth;
- const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
- FRAME_OVERHEAD_BITS);
const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+ int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+ FRAME_OVERHEAD_BITS);
+ int target = rc->av_per_frame_bandwidth;
+ if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ // Note that for layers, av_per_frame_bandwidth is the cumulative
+ // per-frame-bandwidth. For the target size of this frame, use the
+ // layer average frame size (i.e., non-cumulative per-frame-bw).
+ int current_temporal_layer = cpi->svc.temporal_layer_id;
+ const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+ target = lc->avg_frame_size;
+ min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+ }
if (diff > 0) {
// Lower the target bandwidth for this frame.
const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
@@ -1149,51 +1232,73 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
target += (target * pct_high) / 200;
}
- if (target < min_frame_target)
- target = min_frame_target;
- return target;
+ return MAX(min_frame_target, target);
}
static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
- int target;
const RATE_CONTROL *rc = &cpi->rc;
+
if (cpi->common.current_video_frame == 0) {
- target = cpi->oxcf.starting_buffer_level / 2;
+ return cpi->oxcf.starting_buffer_level / 2;
} else {
- int initial_boost = 32;
+ const int initial_boost = 32;
int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
if (rc->frames_since_key < cpi->output_framerate / 2) {
kf_boost = (int)(kf_boost * rc->frames_since_key /
(cpi->output_framerate / 2));
}
- target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
+ return ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
}
- return target;
+}
+
+void vp9_rc_get_svc_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ int target = cpi->rc.av_per_frame_bandwidth;
+ if ((cm->current_video_frame == 0) ||
+ (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
+ cpi->key_frame_frequency == 0))) {
+ cm->frame_type = KEY_FRAME;
+ cpi->rc.source_alt_ref_active = 0;
+ if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ }
+ } else {
+ cm->frame_type = INTER_FRAME;
+ if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+ target = calc_pframe_target_size_one_pass_cbr(cpi);
+ }
+ }
+ cpi->rc.active_worst_quality =
+ calc_active_worst_quality_one_pass_cbr(cpi);
+ vp9_rc_set_frame_target(cpi, target);
+ cpi->rc.frames_till_gf_update_due = INT_MAX;
+ cpi->rc.baseline_gf_interval = INT_MAX;
}
void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
int target;
if ((cm->current_video_frame == 0 ||
cm->frame_flags & FRAMEFLAGS_KEY ||
- cpi->rc.frames_to_key == 0 ||
+ rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
- cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
- cpi->rc.frames_to_key == 0;
- cpi->rc.frames_to_key = cpi->key_frame_frequency;
- cpi->rc.kf_boost = DEFAULT_KF_BOOST;
- cpi->rc.source_alt_ref_active = 0;
+ rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+ rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->key_frame_frequency;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
target = calc_iframe_target_size_one_pass_cbr(cpi);
- cpi->rc.active_worst_quality = cpi->rc.worst_quality;
} else {
cm->frame_type = INTER_FRAME;
target = calc_pframe_target_size_one_pass_cbr(cpi);
- cpi->rc.active_worst_quality =
- calc_active_worst_quality_one_pass_cbr(cpi);
}
+ cpi->rc.active_worst_quality =
+ calc_active_worst_quality_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
// Don't use gf_update by default in CBR mode.
- cpi->rc.frames_till_gf_update_due = INT_MAX;
- cpi->rc.baseline_gf_interval = INT_MAX;
+ rc->frames_till_gf_update_due = INT_MAX;
+ rc->baseline_gf_interval = INT_MAX;
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e5230feb4..fcfab2a41 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
so = &vp9_scan_orders[TX_4X4][tx_type];
if (tx_type != DCT_DCT)
- vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
+ vp9_fht4x4(src_diff, coeff, 8, tx_type);
else
x->fwd_txm4x4(src_diff, coeff, 8);
@@ -1645,14 +1645,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi_buf, int filter_idx,
int_mv seg_mvs[4][MAX_REF_FRAMES],
int mi_row, int mi_col) {
- int i, br = 0, idx, idy;
+ int k, br = 0, idx, idy;
int64_t bd = 0, block_sse = 0;
MB_PREDICTION_MODE this_mode;
+ MACROBLOCKD *xd = &x->e_mbd;
VP9_COMMON *cm = &cpi->common;
- MODE_INFO *mi = x->e_mbd.mi_8x8[0];
+ MODE_INFO *mi = xd->mi_8x8[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
struct macroblock_plane *const p = &x->plane[0];
- struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
const int label_count = 4;
int64_t this_segment_rd = 0;
int label_mv_thresh;
@@ -1660,7 +1661,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
- vp9_variance_fn_ptr_t *v_fn_ptr;
+ vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
ENTROPY_CONTEXT t_above[2], t_left[2];
BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
int mode_idx;
@@ -1670,8 +1671,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
- v_fn_ptr = &cpi->fn_ptr[bsize];
-
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
// segments. setting this to 1 would make mv thresh
@@ -1687,20 +1686,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
MB_PREDICTION_MODE mode_selected = ZEROMV;
int64_t best_rd = INT64_MAX;
- i = idy * 2 + idx;
-
- frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
- vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
- i, 0, mi_row, mi_col,
- &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
- &frame_mv[NEARMV][mbmi->ref_frame[0]]);
- if (has_second_rf) {
- frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
- vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
- i, 1, mi_row, mi_col,
- &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
- &frame_mv[NEARMV][mbmi->ref_frame[1]]);
+ const int i = idy * 2 + idx;
+ int ref;
+
+ for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+ const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+ frame_mv[ZEROMV][frame].as_int = 0;
+ vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
+ &frame_mv[NEARESTMV][frame],
+ &frame_mv[NEARMV][frame]);
}
+
// search for the best motion vector on this segment
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
const struct buf_2d orig_src = x->plane[0].src;
@@ -2042,8 +2038,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bsi->sse = block_sse;
// update the coding decisions
- for (i = 0; i < 4; ++i)
- bsi->modes[i] = mi->bmi[i].as_mode;
+ for (k = 0; k < 4; ++k)
+ bsi->modes[k] = mi->bmi[k].as_mode;
}
static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad.c
index 58c5df47e..58c5df47e 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad.c
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index ea031fb07..2b82d9750 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
transpose_4x4_avx2(in);
}
-void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[4];
- load_buffer_4x4_avx2(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct4_avx2(in);
- fdct4_avx2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fdct4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fdct4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4_avx2(input, in, stride);
fadst4_avx2(in);
fadst4_avx2(in);
+ write_buffer_4x4_avx2(output, in);
break;
default:
assert(0);
break;
}
- write_buffer_4x4_avx2(output, in);
}
void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
@@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) {
array_transpose_8x8_avx2(in, in);
}
-void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[8];
- load_buffer_8x8_avx2(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct8_avx2(in);
- fdct8_avx2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fdct8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fdct8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8_avx2(input, in, stride);
fadst8_avx2(in);
fadst8_avx2(in);
+ right_shift_8x8_avx2(in, 1);
+ write_buffer_8x8_avx2(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8_avx2(in, 1);
- write_buffer_8x8_avx2(output, in, 8);
}
void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
@@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) {
array_transpose_16x16_avx2(in0, in1);
}
-void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in0[16], in1[16];
- load_buffer_16x16_avx2(input, in0, in1, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct16_avx2(in0, in1);
- right_shift_16x16_avx2(in0, in1);
- fdct16_avx2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_avx2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fdct16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fdct16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16_avx2(input, in0, in1, stride);
fadst16_avx2(in0, in1);
right_shift_16x16_avx2(in0, in1);
fadst16_avx2(in0, in1);
+ write_buffer_16x16_avx2(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16_avx2(output, in0, in1, 16);
}
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index c876cc273..852cf8667 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) {
transpose_4x4(in);
}
-void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[4];
- load_buffer_4x4(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct4_sse2(in);
- fdct4_sse2(in);
+ case DCT_DCT:
+ vp9_fdct4x4_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fdct4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_4x4(input, in, stride);
fdct4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
+ write_buffer_4x4(output, in);
break;
- default:
- assert(0);
- break;
+ default:
+ assert(0);
+ break;
}
- write_buffer_4x4(output, in);
}
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
@@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
-void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in[8];
- load_buffer_8x8(input, in, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct8_sse2(in);
- fdct8_sse2(in);
+ case DCT_DCT:
+ vp9_fdct8x8_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fdct8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_8x8(input, in, stride);
fdct8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
+ right_shift_8x8(in, 1);
+ write_buffer_8x8(output, in, 8);
break;
default:
assert(0);
break;
}
- right_shift_8x8(in, 1);
- write_buffer_8x8(output, in, 8);
}
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
@@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) {
array_transpose_16x16(in0, in1);
}
-void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
- int stride, int tx_type) {
+void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
+ int stride, int tx_type) {
__m128i in0[16], in1[16];
- load_buffer_16x16(input, in0, in1, stride);
+
switch (tx_type) {
- case 0: // DCT_DCT
- fdct16_sse2(in0, in1);
- right_shift_16x16(in0, in1);
- fdct16_sse2(in0, in1);
+ case DCT_DCT:
+ vp9_fdct16x16_sse2(input, output, stride);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fdct16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fdct16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
- case 3: // ADST_ADST
+ case ADST_ADST:
+ load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
+ write_buffer_16x16(output, in0, in1, 16);
break;
default:
assert(0);
break;
}
- write_buffer_16x16(output, in0, in1, 16);
}
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index db306603b..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
pmaxsw m8, m7
pshuflw m7, m8, 0x1
pmaxsw m8, m7
- pextrw [r2], m8, 0
+ pextrw r6, m8, 0
+ mov [r2], r6
RET
; skip-block, i.e. just write all zeroes
@@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
%endmacro
INIT_XMM ssse3
-QUANTIZE_FN b, 6
+QUANTIZE_FN b, 7
QUANTIZE_FN b_32x32, 7