summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_block.h6
-rw-r--r--vp9/encoder/vp9_context_tree.h1
-rw-r--r--vp9/encoder/vp9_dct.c44
-rw-r--r--vp9/encoder/vp9_encodeframe.c70
-rw-r--r--vp9/encoder/vp9_encodemb.c61
-rw-r--r--vp9/encoder/vp9_encodemb.h3
-rw-r--r--vp9/encoder/vp9_encoder.c4
-rw-r--r--vp9/encoder/vp9_encoder.h4
-rw-r--r--vp9/encoder/vp9_firstpass.c129
-rw-r--r--vp9/encoder/vp9_firstpass.h1
-rw-r--r--vp9/encoder/vp9_mcomp.c46
-rw-r--r--vp9/encoder/vp9_mcomp.h8
-rw-r--r--vp9/encoder/vp9_pickmode.c25
-rw-r--r--vp9/encoder/vp9_quantize.c50
-rw-r--r--vp9/encoder/vp9_quantize.h13
-rw-r--r--vp9/encoder/vp9_ratectrl.c5
-rw-r--r--vp9/encoder/vp9_rdopt.c81
-rw-r--r--vp9/encoder/vp9_rdopt.h46
-rw-r--r--vp9/encoder/x86/vp9_dct_sse2.c208
19 files changed, 633 insertions, 172 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index c3cd93b78..2463ed0f4 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -28,6 +28,7 @@ struct macroblock_plane {
struct buf_2d src;
// Quantizer setings
+ int16_t *quant_fp;
int16_t *quant;
int16_t *quant_shift;
int16_t *zbin;
@@ -48,7 +49,7 @@ struct macroblock {
MACROBLOCKD e_mbd;
int skip_block;
- int select_txfm_size;
+ int select_tx_size;
int skip_recode;
int skip_optimize;
int q_index;
@@ -105,6 +106,9 @@ struct macroblock {
int use_lp32x32fdct;
int skip_encode;
+ // skip forward transform and quantization
+ int skip_txfm;
+
// Used to store sub partition's choices.
MV pred_mv[MAX_REF_FRAMES];
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index bb384aacd..872223b75 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -33,6 +33,7 @@ typedef struct {
int is_coded;
int num_4x4_blk;
int skip;
+ int skip_txfm;
int best_mode_index;
int hybrid_pred_diff;
int comp_pred_diff;
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 577276764..5c99a0a78 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -43,6 +43,17 @@ static void fdct4(const int16_t *input, int16_t *output) {
output[3] = fdct_round_shift(temp2);
}
+void vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) {
+ int r, c;
+ int16_t sum = 0;
+ for (r = 0; r < 4; ++r)
+ for (c = 0; c < 4; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum << 3;
+ output[1] = 0;
+}
+
void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
@@ -240,6 +251,17 @@ static void fdct8(const int16_t *input, int16_t *output) {
output[7] = fdct_round_shift(t3);
}
+void vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) {
+ int r, c;
+ int16_t sum = 0;
+ for (r = 0; r < 8; ++r)
+ for (c = 0; c < 8; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum * 8;
+ output[1] = 0;
+}
+
void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
int i, j;
int16_t intermediate[64];
@@ -311,6 +333,17 @@ void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
}
}
+void vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) {
+ int r, c;
+ int16_t sum = 0;
+ for (r = 0; r < 16; ++r)
+ for (c = 0; c < 16; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum * 8;
+ output[1] = 0;
+}
+
void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
@@ -1329,6 +1362,17 @@ static void fdct32(const int *input, int *output, int round) {
output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64);
}
+void vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) {
+ int r, c;
+ int16_t sum = 0;
+ for (r = 0; r < 32; ++r)
+ for (c = 0; c < 32; ++c)
+ sum += input[r * stride + c];
+
+ output[0] = sum << 2;
+ output[1] = 0;
+}
+
void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) {
int i, j;
int output[32 * 32];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 87e0c08d2..001ac69bd 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -697,6 +697,38 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
x->e_mbd.plane[i].subsampling_y);
}
+static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate,
+ int64_t *dist, BLOCK_SIZE bsize) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+ INTERP_FILTER filter_ref;
+
+ if (xd->up_available)
+ filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
+ else if (xd->left_available)
+ filter_ref = xd->mi[-1]->mbmi.interp_filter;
+ else
+ filter_ref = EIGHTTAP;
+
+ mbmi->sb_type = bsize;
+ mbmi->mode = ZEROMV;
+ mbmi->tx_size = MIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[tx_mode]);
+ mbmi->skip = 1;
+ mbmi->uv_mode = DC_PRED;
+ mbmi->ref_frame[0] = LAST_FRAME;
+ mbmi->ref_frame[1] = NONE;
+ mbmi->mv[0].as_int = 0;
+ mbmi->interp_filter = filter_ref;
+
+ xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
+ x->skip = 1;
+ x->skip_encode = 1;
+
+ *rate = 0;
+ *dist = 0;
+}
+
static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col,
int *totalrate, int64_t *totaldist,
@@ -1338,6 +1370,7 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
x->skip = ctx->skip;
+ x->skip_txfm = mbmi->segment_id ? 0 : ctx->skip_txfm;
}
static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile,
@@ -2364,15 +2397,15 @@ static int check_dual_ref_flags(VP9_COMP *cpi) {
}
}
-static void reset_skip_txfm_size(VP9_COMMON *cm, TX_SIZE txfm_max) {
+static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
int mi_row, mi_col;
const int mis = cm->mi_stride;
MODE_INFO **mi_ptr = cm->mi_grid_visible;
for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
- if (mi_ptr[mi_col]->mbmi.tx_size > txfm_max)
- mi_ptr[mi_col]->mbmi.tx_size = txfm_max;
+ if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size)
+ mi_ptr[mi_col]->mbmi.tx_size = max_tx_size;
}
}
}
@@ -2441,17 +2474,21 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi;
set_offsets(cpi, tile, mi_row, mi_col, bsize);
- xd->mi[0]->mbmi.sb_type = bsize;
+ mbmi = &xd->mi[0]->mbmi;
+ mbmi->sb_type = bsize;
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- if (xd->mi[0]->mbmi.segment_id && x->in_static_area)
+ if (mbmi->segment_id && x->in_static_area)
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
}
if (!frame_is_intra_only(cm)) {
- vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
- rate, dist, bsize);
+ if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
+ else
+ vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize);
} else {
set_mode_info(&xd->mi[0]->mbmi, bsize, DC_PRED);
}
@@ -2577,6 +2614,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
&this_rate, &this_dist, bsize);
ctx->mic.mbmi = xd->mi[0]->mbmi;
+ ctx->skip_txfm = x->skip_txfm;
if (this_rate != INT_MAX) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2663,6 +2701,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
&this_rate, &this_dist, subsize);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
@@ -2672,6 +2711,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
&this_rate, &this_dist, subsize);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
@@ -2701,12 +2741,14 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
&this_rate, &this_dist, subsize);
pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[0].skip_txfm = x->skip_txfm;
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
load_pred_mv(x, ctx);
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
&this_rate, &this_dist, subsize);
pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[1].skip_txfm = x->skip_txfm;
if (this_rate == INT_MAX) {
sum_rd = INT64_MAX;
} else {
@@ -2795,14 +2837,17 @@ static void nonrd_use_partition(VP9_COMP *cpi,
case PARTITION_NONE:
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->none.skip_txfm = x->skip_txfm;
break;
case PARTITION_VERT:
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[0].skip_txfm = x->skip_txfm;
if (mi_col + hbs < cm->mi_cols) {
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
&rate, &dist, subsize);
pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->vertical[1].skip_txfm = x->skip_txfm;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -2813,10 +2858,12 @@ static void nonrd_use_partition(VP9_COMP *cpi,
case PARTITION_HORZ:
nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[0].skip_txfm = x->skip_txfm;
if (mi_row + hbs < cm->mi_rows) {
nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
&rate, &dist, subsize);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
+ pc_tree->horizontal[1].skip_txfm = x->skip_txfm;
if (rate != INT_MAX && dist != INT64_MAX &&
*totrate != INT_MAX && *totdist != INT64_MAX) {
*totrate += rate;
@@ -3019,6 +3066,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
init_encode_frame_mb_context(cpi);
set_prev_mi(cm);
+ x->skip_txfm = 0;
if (sf->use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD
// mode decision is used and hence no buffer pointer swap needed.
@@ -3207,16 +3255,16 @@ void vp9_encode_frame(VP9_COMP *cpi) {
if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
count32x32 == 0) {
cm->tx_mode = ALLOW_8X8;
- reset_skip_txfm_size(cm, TX_8X8);
+ reset_skip_tx_size(cm, TX_8X8);
} else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
cm->tx_mode = ONLY_4X4;
- reset_skip_txfm_size(cm, TX_4X4);
+ reset_skip_tx_size(cm, TX_4X4);
} else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
cm->tx_mode = ALLOW_32X32;
} else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
cm->tx_mode = ALLOW_16X16;
- reset_skip_txfm_size(cm, TX_16X16);
+ reset_skip_tx_size(cm, TX_16X16);
}
}
} else {
@@ -3277,7 +3325,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
- x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
+ x->skip_recode = !x->select_tx_size && mbmi->sb_type >= BLOCK_8X8 &&
cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
cpi->sf.allow_skip_recode;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 8581e6117..1c00698b5 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -301,6 +301,52 @@ static INLINE void fdct32x32(int rd_transform,
vp9_fdct32x32(src, dst, src_stride);
}
+void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
+ int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ uint16_t *const eob = &p->eobs[block];
+ const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
+ int i, j;
+ const int16_t *src_diff;
+
+ txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+ src_diff = &p->src_diff[4 * (j * diff_stride + i)];
+
+ switch (tx_size) {
+ case TX_32X32:
+ vp9_fdct32x32_1(src_diff, coeff, diff_stride);
+ vp9_quantize_dc_32x32(coeff, x->skip_block, p->round,
+ p->quant_fp[0], qcoeff, dqcoeff,
+ pd->dequant[0], eob);
+ break;
+ case TX_16X16:
+ vp9_fdct16x16_1(src_diff, coeff, diff_stride);
+ vp9_quantize_dc(coeff, x->skip_block, p->round,
+ p->quant_fp[0], qcoeff, dqcoeff,
+ pd->dequant[0], eob);
+ break;
+ case TX_8X8:
+ vp9_fdct8x8_1(src_diff, coeff, diff_stride);
+ vp9_quantize_dc(coeff, x->skip_block, p->round,
+ p->quant_fp[0], qcoeff, dqcoeff,
+ pd->dequant[0], eob);
+ break;
+ case TX_4X4:
+ x->fwd_txm4x4(src_diff, coeff, diff_stride);
+ vp9_quantize_dc(coeff, x->skip_block, p->round,
+ p->quant_fp[0], qcoeff, dqcoeff,
+ pd->dequant[0], eob);
+ break;
+ default:
+ assert(0);
+ }
+}
+
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -376,8 +422,19 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
return;
}
- if (!x->skip_recode)
- vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ if (x->skip_txfm == 0) {
+ // full forward transform and quantization
+ if (!x->skip_recode)
+ vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+ } else if (x->skip_txfm == 2) {
+ // fast path forward transform and quantization
+ vp9_xform_quant_fp(x, plane, block, plane_bsize, tx_size);
+ } else {
+ // skip forward transform
+ p->eobs[block] = 0;
+ *a = *l = 0;
+ return;
+ }
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
const int ctx = combine_entropy_contexts(*a, *l);
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 802145984..3196c9920 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -22,7 +22,8 @@ extern "C" {
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
-
+void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 6c0e0b58d..aa7a91dc1 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -588,8 +588,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
cpi->oxcf = *oxcf;
cpi->pass = get_pass(cpi->oxcf.mode);
- if (cpi->oxcf.mode == REALTIME)
- cpi->oxcf.play_alternate = 0;
rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
@@ -2416,7 +2414,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->refresh_alt_ref_frame = 0;
// Should we code an alternate reference frame.
- if (cpi->oxcf.play_alternate && rc->source_alt_ref_pending) {
+ if (is_altref_enabled(&cpi->oxcf) && rc->source_alt_ref_pending) {
int frames_to_arf;
#if CONFIG_MULTIPLE_ARF
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 7e8206119..6b0e228ca 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -284,6 +284,10 @@ typedef struct VP9EncoderConfig {
vp8e_tuning tuning;
} VP9EncoderConfig;
+static INLINE int is_altref_enabled(const VP9EncoderConfig *cfg) {
+ return cfg->mode != REALTIME && cfg->play_alternate && cfg->lag_in_frames > 0;
+}
+
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index a49fe3df5..430f71394 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -137,14 +137,13 @@ static void output_stats(FIRSTPASS_STATS *stats,
FILE *fpfile;
fpfile = fopen("firstpass.stt", "a");
- fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
+ fprintf(fpfile, "%12.0f %12.0f %12.0f %12.0f %12.4f %12.4f"
"%12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f"
"%12.0f %12.0f %12.4f %12.0f %12.0f %12.4f\n",
stats->frame,
stats->intra_error,
stats->coded_error,
stats->sr_coded_error,
- stats->ssim_weighted_pred_err,
stats->pcnt_inter,
stats->pcnt_motion,
stats->pcnt_second_ref,
@@ -597,73 +596,91 @@ void vp9_first_pass(VP9_COMP *cpi) {
if (cm->current_video_frame > 0) {
int tmp_err, motion_error;
int_mv mv, tmp_mv;
+ int raw_motion_error;
+ struct buf_2d unscaled_last_source_buf_2d;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
motion_error = get_prediction_error(bsize, &x->plane[0].src,
&xd->plane[0].pre[0]);
- // Assume 0,0 motion with no mv overhead.
- mv.as_int = tmp_mv.as_int = 0;
-
- // Test last reference frame using the previous best mv as the
- // starting point (best reference) for the search.
- first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
- &motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- motion_error = (int)(motion_error * error_weight);
- }
- // If the current best reference mv is not centered on 0,0 then do a 0,0
- // based search as well.
- if (best_ref_mv.as_int) {
- tmp_err = INT_MAX;
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
- &tmp_err);
+ // compute the motion error of the zero motion vector using the last
+ // source frame as the reference
+ // skip the further motion search on reconstructed frame
+ // if this error is small
+ unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer
+ + recon_yoffset;
+ unscaled_last_source_buf_2d.stride =
+ cpi->unscaled_last_source->y_stride;
+ raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &unscaled_last_source_buf_2d);
+
+ // TODO(pengchong): Replace the hard-coded threshold
+ if (raw_motion_error > 25) {
+ // Assume 0,0 motion with no mv overhead.
+ mv.as_int = tmp_mv.as_int = 0;
+
+ // Test last reference frame using the previous best mv as the
+ // starting point (best reference) for the search.
+ first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
+ &motion_error);
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
- tmp_err = (int)(tmp_err * error_weight);
+ motion_error = (int)(motion_error * error_weight);
}
- if (tmp_err < motion_error) {
- motion_error = tmp_err;
- mv.as_int = tmp_mv.as_int;
+ // If the current best reference mv is not centered on 0,0
+ // then do a 0,0
+ // based search as well.
+ if (best_ref_mv.as_int) {
+ tmp_err = INT_MAX;
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+ &tmp_err);
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_clear_system_state();
+ tmp_err = (int)(tmp_err * error_weight);
+ }
+
+ if (tmp_err < motion_error) {
+ motion_error = tmp_err;
+ mv.as_int = tmp_mv.as_int;
+ }
}
- }
- // Search in an older reference frame.
- if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
- // Assume 0,0 motion with no mv overhead.
- int gf_motion_error;
+ // Search in an older reference frame.
+ if (cm->current_video_frame > 1 && gld_yv12 != NULL) {
+ // Assume 0,0 motion with no mv overhead.
+ int gf_motion_error;
- xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
- gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
- &xd->plane[0].pre[0]);
+ xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
+ gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
- first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
- &gf_motion_error);
- if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
- vp9_clear_system_state();
- gf_motion_error = (int)(gf_motion_error * error_weight);
- }
+ first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+ &gf_motion_error);
+ if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+ vp9_clear_system_state();
+ gf_motion_error = (int)(gf_motion_error * error_weight);
+ }
- if (gf_motion_error < motion_error && gf_motion_error < this_error)
- ++second_ref_count;
-
- // Reset to last frame as reference buffer.
- xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
- xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
- xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
-
- // In accumulating a score for the older reference frame take the
- // best of the motion predicted score and the intra coded error
- // (just as will be done for) accumulation of "coded_error" for
- // the last frame.
- if (gf_motion_error < this_error)
- sr_coded_error += gf_motion_error;
- else
- sr_coded_error += this_error;
- } else {
- sr_coded_error += motion_error;
+ if (gf_motion_error < motion_error && gf_motion_error < this_error)
+ ++second_ref_count;
+
+ // Reset to last frame as reference buffer.
+ xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
+ xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
+ xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
+
+ // In accumulating a score for the older reference frame take the
+ // best of the motion predicted score and the intra coded error
+ // (just as will be done for) accumulation of "coded_error" for
+ // the last frame.
+ if (gf_motion_error < this_error)
+ sr_coded_error += gf_motion_error;
+ else
+ sr_coded_error += this_error;
+ } else {
+ sr_coded_error += motion_error;
+ }
}
// Start by assuming that intra mode is best.
best_ref_mv.as_int = 0;
@@ -1508,7 +1525,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
double mv_ratio_accumulator_thresh;
- unsigned int allow_alt_ref = oxcf->play_alternate && oxcf->lag_in_frames;
+ unsigned int allow_alt_ref = is_altref_enabled(oxcf);
int f_boost = 0;
int b_boost = 0;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 309638c1e..82065213e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -22,7 +22,6 @@ typedef struct {
double intra_error;
double coded_error;
double sr_coded_error;
- double ssim_weighted_pred_err;
double pcnt_inter;
double pcnt_motion;
double pcnt_second_ref;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index dbd19a2d6..48ac5f929 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1593,3 +1593,49 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
}
return best_sad;
}
+
+int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full,
+ int step_param, int error_per_bit,
+ const MV *ref_mv, MV *tmp_mv,
+ int var_max, int rd) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ const SEARCH_METHODS method = sf->search_method;
+ vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
+ int var = 0;
+
+ switch (method) {
+ case FAST_DIAMOND:
+ var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case FAST_HEX:
+ var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case HEX:
+ var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case SQUARE:
+ var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case BIGDIA:
+ var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
+ fn_ptr, 1, ref_mv, tmp_mv);
+ break;
+ case NSTEP:
+ var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
+ (sf->max_step_search_steps - 1) - step_param,
+ 1, fn_ptr, ref_mv, tmp_mv);
+ break;
+ default:
+ assert(!"Invalid search method.");
+ }
+
+ if (method != NSTEP && rd && var < var_max)
+ var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
+
+ return var;
+}
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 873edf376..07e410d3c 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -145,6 +145,14 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *center_mv, const uint8_t *second_pred);
+
+struct VP9_COMP;
+
+int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, MV *mvp_full,
+ int step_param, int error_per_bit,
+ const MV *ref_mv, MV *tmp_mv,
+ int var_max, int rd);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index d76d601ff..4b0b85ad4 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -84,8 +84,8 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
- full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv,
- &tmp_mv->as_mv, INT_MAX, 0);
+ vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv,
+ &tmp_mv->as_mv, INT_MAX, 0);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
@@ -156,24 +156,28 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
unsigned int sse;
int rate;
int64_t dist;
-
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
-
+ const int quant = pd->dequant[1];
unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
-
*var_y = var;
*sse_y = sse;
+ if (sse < pd->dequant[0] * pd->dequant[0] >> 6)
+ x->skip_txfm = 1;
+ else if (var < quant * quant >> 6)
+ x->skip_txfm = 2;
+ else
+ x->skip_txfm = 0;
+
// TODO(jingning) This is a temporary solution to account for frames with
// light changes. Need to customize the rate-distortion modeling for non-RD
// mode decision.
if ((sse >> 3) > var)
sse = var;
-
vp9_model_rd_from_var_lapndz(var + sse, 1 << num_pels_log2_lookup[bsize],
- pd->dequant[1] >> 3, &rate, &dist);
+ quant >> 3, &rate, &dist);
*out_rate_sum = rate;
*out_dist_sum = dist << 3;
}
@@ -199,6 +203,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd = INT64_MAX;
+ int skip_txfm = 0;
int rate = INT_MAX;
int64_t dist = INT64_MAX;
@@ -341,6 +346,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cost < best_cost) {
best_filter = filter;
best_cost = cost;
+ skip_txfm = x->skip_txfm;
}
}
@@ -349,6 +355,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
dist = pf_dist[mbmi->interp_filter];
var_y = pf_var[mbmi->interp_filter];
sse_y = pf_sse[mbmi->interp_filter];
+ x->skip_txfm = skip_txfm;
} else {
mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
@@ -438,6 +445,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
best_mode = this_mode;
best_pred_filter = mbmi->interp_filter;
best_ref_frame = ref_frame;
+ skip_txfm = x->skip_txfm;
}
if (x->skip)
@@ -450,6 +458,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[0] = best_ref_frame;
mbmi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
+ x->skip_txfm = skip_txfm;
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
@@ -474,6 +483,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->uv_mode = this_mode;
mbmi->mv[0].as_int = INVALID_MV;
+ } else {
+ x->skip_txfm = skip_txfm;
}
}
}
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 4d3086d60..f817bcc31 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -19,6 +19,50 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_rdopt.h"
+void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ if (!skip_block) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 16;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
+void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr) {
+ int eob = -1;
+
+ if (!skip_block) {
+ const int rc = 0;
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = (coeff >> 31);
+ const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+
+ int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+ tmp = (tmp * quant) >> 15;
+ qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+ dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
+ if (tmp)
+ eob = 0;
+ }
+ *eob_ptr = eob + 1;
+}
+
void vp9_quantize_b_c(const int16_t *coeff_ptr, intptr_t count,
int skip_block,
const int16_t *zbin_ptr, const int16_t *round_ptr,
@@ -167,6 +211,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q)
: vp9_ac_quant(q, 0);
invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant);
+ quants->y_quant_fp[q][i] = (1 << 16) / quant;
quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->y_round[q][i] = (qrounding_factor * quant) >> 7;
cm->y_dequant[q][i] = quant;
@@ -176,6 +221,7 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
: vp9_ac_quant(q, cm->uv_ac_delta_q);
invert_quant(&quants->uv_quant[q][i],
&quants->uv_quant_shift[q][i], quant);
+ quants->uv_quant_fp[q][i] = (1 << 16) / quant;
quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7);
quants->uv_round[q][i] = (qrounding_factor * quant) >> 7;
cm->uv_dequant[q][i] = quant;
@@ -193,12 +239,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
for (i = 2; i < 8; i++) {
quants->y_quant[q][i] = quants->y_quant[q][1];
+ quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1];
quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1];
quants->y_zbin[q][i] = quants->y_zbin[q][1];
quants->y_round[q][i] = quants->y_round[q][1];
cm->y_dequant[q][i] = cm->y_dequant[q][1];
quants->uv_quant[q][i] = quants->uv_quant[q][1];
+ quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1];
quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1];
quants->uv_zbin[q][i] = quants->uv_zbin[q][1];
quants->uv_round[q][i] = quants->uv_round[q][1];
@@ -227,6 +275,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
// Y
x->plane[0].quant = quants->y_quant[qindex];
+ x->plane[0].quant_fp = quants->y_quant_fp[qindex];
x->plane[0].quant_shift = quants->y_quant_shift[qindex];
x->plane[0].zbin = quants->y_zbin[qindex];
x->plane[0].round = quants->y_round[qindex];
@@ -236,6 +285,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
// UV
for (i = 1; i < 3; i++) {
x->plane[i].quant = quants->uv_quant[qindex];
+ x->plane[i].quant_fp = quants->uv_quant_fp[qindex];
x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
x->plane[i].zbin = quants->uv_zbin[qindex];
x->plane[i].round = quants->uv_round[qindex];
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 1835e9ccc..0e90462eb 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -24,6 +24,11 @@ typedef struct {
DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]);
+ // TODO(jingning): in progress of re-working the quantization. will decide
+ // if we want to deprecate the current use of y_quant.
+ DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]);
+ DECLARE_ALIGNED(16, int16_t, uv_quant_fp[QINDEX_RANGE][8]);
+
DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]);
DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]);
@@ -37,6 +42,14 @@ typedef struct {
#endif
} QUANTS;
+void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
+void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
+ const int16_t *round_ptr, const int16_t quant_ptr,
+ int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
+ const int16_t dequant_ptr, uint16_t *eob_ptr);
void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
const int16_t *scan, const int16_t *iscan);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index e8e425661..143c23ba9 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1129,7 +1129,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
- if (oxcf->play_alternate && cpi->refresh_alt_ref_frame &&
+ if (is_altref_enabled(oxcf) && cpi->refresh_alt_ref_frame &&
(cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
@@ -1389,8 +1389,7 @@ void vp9_rc_set_gf_max_interval(const VP9EncoderConfig *const oxcf,
// Extended interval for genuinely static scenes
rc->static_scene_max_gf_interval = oxcf->key_freq >> 1;
- // Special conditions when alt ref frame enabled
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
+ if (is_altref_enabled(oxcf)) {
if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8e77dd05b..e110df2c4 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -311,8 +311,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
- x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
- cm->frame_type != KEY_FRAME) ? 0 : 1;
+ x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
+ cm->frame_type != KEY_FRAME) ? 0 : 1;
set_block_thresholds(cm, rd);
@@ -796,11 +796,11 @@ static void txfm_rd_in_plane(MACROBLOCK *x,
}
}
-static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
- int *rate, int64_t *distortion,
- int *skip, int64_t *sse,
- int64_t ref_best_rd,
- BLOCK_SIZE bs) {
+static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
+ int *rate, int64_t *distortion,
+ int *skip, int64_t *sse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
@@ -815,12 +815,12 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
cpi->tx_stepdown_count[0]++;
}
-static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
- int (*r)[2], int *rate,
- int64_t *d, int64_t *distortion,
- int *s, int *skip,
- int64_t tx_cache[TX_MODES],
- BLOCK_SIZE bs) {
+static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
+ int (*r)[2], int *rate,
+ int64_t *d, int64_t *distortion,
+ int *s, int *skip,
+ int64_t tx_cache[TX_MODES],
+ BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -898,12 +898,12 @@ static int64_t scaled_rd_cost(int rdmult, int rddiv,
return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
}
-static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
- int (*r)[2], int *rate,
- int64_t *d, int64_t *distortion,
- int *s, int *skip, int64_t *sse,
- int64_t ref_best_rd,
- BLOCK_SIZE bs) {
+static void choose_tx_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
+ int (*r)[2], int *rate,
+ int64_t *d, int64_t *distortion,
+ int *s, int *skip, int64_t *sse,
+ int64_t ref_best_rd,
+ BLOCK_SIZE bs) {
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -987,8 +987,8 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
- choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
- ref_best_rd, bs);
+ choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd,
+ bs);
if (psse)
*psse = sse[mbmi->tx_size];
return;
@@ -998,8 +998,8 @@ static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size], &s[tx_size],
&sse[tx_size], ref_best_rd, 0, bs, tx_size,
cpi->sf.use_fast_coef_costing);
- choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
- skip, txfm_cache, bs);
+ choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s,
+ skip, txfm_cache, bs);
if (psse)
*psse = sse[mbmi->tx_size];
@@ -1017,8 +1017,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
assert(bs == mbmi->sb_type);
if (cpi->sf.tx_size_search_method != USE_FULL_RD || xd->lossless) {
vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
- choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
- ref_best_rd, bs);
+ choose_largest_tx_size(cpi, x, rate, distortion, skip, sse, ref_best_rd,
+ bs);
} else {
int r[TX_SIZES][2], s[TX_SIZES];
int64_t d[TX_SIZES];
@@ -1028,8 +1028,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
&s[tx_size], &sse[tx_size],
ref_best_rd, 0, bs, tx_size,
cpi->sf.use_fast_coef_costing);
- choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
- skip, txfm_cache, bs);
+ choose_tx_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
+ bs);
}
if (psse)
*psse = sse[mbmi->tx_size];
@@ -1330,7 +1330,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
int64_t ref_best_rd) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
- TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
+ const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi);
int plane;
int pnrate = 0, pnskip = 1;
int64_t pndist = 0, pnsse = 0;
@@ -1351,7 +1351,7 @@ static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
- ref_best_rd, plane, bsize, uv_txfm_size,
+ ref_best_rd, plane, bsize, uv_tx_size,
cpi->sf.use_fast_coef_costing);
if (pnrate == INT_MAX)
goto term;
@@ -1403,7 +1403,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate_tokenonly = this_rate_tokenonly;
*distortion = this_distortion;
*skippable = s;
- if (!x->select_txfm_size)
+ if (!x->select_tx_size)
swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
}
}
@@ -1850,9 +1850,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
- bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
- sadpb, &bsi->ref_mv[0]->as_mv, new_mv,
- INT_MAX, 1);
+ bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
+ sadpb, &bsi->ref_mv[0]->as_mv, new_mv,
+ INT_MAX, 1);
// Should we do a full search (best quality only)
if (is_best_mode(cpi->oxcf.mode)) {
@@ -2385,8 +2385,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
- bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
- &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
+ bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
+ &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
@@ -2802,7 +2802,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rate2 += vp9_get_switchable_rate(cpi);
if (!is_comp_pred) {
- if (!x->in_active_map) {
+ if (!x->in_active_map ||
+ vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
if (psse)
*psse = 0;
*distortion = 0;
@@ -3119,9 +3120,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// If the segment skip feature is enabled....
// then do nothing if the current mode is not allowed..
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
- const int inter_non_zero_mode_mask = 0x1F7F7;
- mode_skip_mask |= inter_non_zero_mode_mask;
- mode_skip_mask &= ~(1 << THR_ZEROMV);
+ mode_skip_mask = ~(1 << THR_ZEROMV);
inter_mode_mask = (1 << ZEROMV);
}
@@ -3442,7 +3441,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_rd = this_rd;
best_mbmode = *mbmi;
best_skip2 = this_skip2;
- if (!x->select_txfm_size)
+ if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(uint8_t) * ctx->num_4x4_blk);
@@ -4072,7 +4071,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
best_mbmode = *mbmi;
best_skip2 = this_skip2;
- if (!x->select_txfm_size)
+ if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
sizeof(uint8_t) * ctx->num_4x4_blk);
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index e85d08a6d..6e5631795 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -94,52 +94,6 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
}
-static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize, MV *mvp_full,
- int step_param, int error_per_bit,
- const MV *ref_mv, MV *tmp_mv,
- int var_max, int rd) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- const SEARCH_METHODS method = sf->search_method;
- vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
- int var = 0;
-
- switch (method) {
- case FAST_DIAMOND:
- var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
- fn_ptr, 1, ref_mv, tmp_mv);
- break;
- case FAST_HEX:
- var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
- fn_ptr, 1, ref_mv, tmp_mv);
- break;
- case HEX:
- var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1,
- fn_ptr, 1, ref_mv, tmp_mv);
- break;
- case SQUARE:
- var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1,
- fn_ptr, 1, ref_mv, tmp_mv);
- break;
- case BIGDIA:
- var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1,
- fn_ptr, 1, ref_mv, tmp_mv);
- break;
- case NSTEP:
- var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
- (sf->max_step_search_steps - 1) - step_param,
- 1, fn_ptr, ref_mv, tmp_mv);
- break;
- default:
- assert(!"Invalid search method.");
- }
-
- if (method != NSTEP && rd && var < var_max)
- var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
-
- return var;
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 1f58d872e..487deef42 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -12,6 +12,35 @@
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vpx_ports/mem.h"
+void vp9_fdct4x4_1_sse2(const int16_t *input, int16_t *output, int stride) {
+ __m128i in0, in1;
+ __m128i tmp;
+ const __m128i zero = _mm_setzero_si128();
+ in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride));
+ in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride));
+ in1 = _mm_unpacklo_epi64(in1, _mm_loadl_epi64((const __m128i *)
+ (input + 2 * stride)));
+ in0 = _mm_unpacklo_epi64(in0, _mm_loadl_epi64((const __m128i *)
+ (input + 3 * stride)));
+
+ tmp = _mm_add_epi16(in0, in1);
+ in0 = _mm_unpacklo_epi16(zero, tmp);
+ in1 = _mm_unpackhi_epi16(zero, tmp);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ tmp = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(tmp, zero);
+ in1 = _mm_unpackhi_epi32(tmp, zero);
+
+ tmp = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(tmp, 8);
+
+ in1 = _mm_add_epi32(tmp, in0);
+ in0 = _mm_slli_epi32(in1, 1);
+ _mm_store_si128((__m128i *)(output), in0);
+}
+
void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
// This 2D transform implements 4 vertical 1D transforms followed
// by 4 horizontal 1D transforms. The multiplies and adds are as given
@@ -377,6 +406,46 @@ void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
}
}
+void vp9_fdct8x8_1_sse2(const int16_t *input, int16_t *output, int stride) {
+ __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+ __m128i u0, u1, sum;
+
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+ sum = _mm_add_epi16(u0, u1);
+
+ in0 = _mm_add_epi16(in0, in1);
+ in2 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, in0);
+
+ u0 = _mm_setzero_si128();
+ sum = _mm_add_epi16(sum, in2);
+
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ _mm_store_si128((__m128i *)(output), in1);
+}
+
void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
int pass;
// Constants
@@ -1168,6 +1237,74 @@ void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
}
}
+void vp9_fdct16x16_1_sse2(const int16_t *input, int16_t *output, int stride) {
+ __m128i in0, in1, in2, in3;
+ __m128i u0, u1;
+ __m128i sum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 2; ++i) {
+ input += 8 * i;
+ in0 = _mm_load_si128((const __m128i *)(input + 0 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 1 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 2 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 3 * stride));
+
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 4 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 5 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 6 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 7 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 8 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 9 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 10 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 11 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 12 * stride));
+ in1 = _mm_load_si128((const __m128i *)(input + 13 * stride));
+ in2 = _mm_load_si128((const __m128i *)(input + 14 * stride));
+ in3 = _mm_load_si128((const __m128i *)(input + 15 * stride));
+
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ sum = _mm_add_epi16(sum, u1);
+ }
+
+ u0 = _mm_setzero_si128();
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ in1 = _mm_srai_epi32(in1, 1);
+ _mm_store_si128((__m128i *)(output), in1);
+}
+
void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
// The 2D transform is done with two passes which are actually pretty
// similar. In the first one, we transform the columns and transpose
@@ -2680,6 +2817,77 @@ void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
}
}
+void vp9_fdct32x32_1_sse2(const int16_t *input, int16_t *output, int stride) {
+ __m128i in0, in1, in2, in3;
+ __m128i u0, u1;
+ __m128i sum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 8; ++i) {
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ in0 = _mm_load_si128((const __m128i *)(input + 0));
+ in1 = _mm_load_si128((const __m128i *)(input + 8));
+ in2 = _mm_load_si128((const __m128i *)(input + 16));
+ in3 = _mm_load_si128((const __m128i *)(input + 24));
+
+ input += stride;
+ sum = _mm_add_epi16(sum, u1);
+ u0 = _mm_add_epi16(in0, in1);
+ u1 = _mm_add_epi16(in2, in3);
+ sum = _mm_add_epi16(sum, u0);
+
+ sum = _mm_add_epi16(sum, u1);
+ }
+
+ u0 = _mm_setzero_si128();
+ in0 = _mm_unpacklo_epi16(u0, sum);
+ in1 = _mm_unpackhi_epi16(u0, sum);
+ in0 = _mm_srai_epi32(in0, 16);
+ in1 = _mm_srai_epi32(in1, 16);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_unpacklo_epi32(sum, u0);
+ in1 = _mm_unpackhi_epi32(sum, u0);
+
+ sum = _mm_add_epi32(in0, in1);
+ in0 = _mm_srli_si128(sum, 8);
+
+ in1 = _mm_add_epi32(sum, in0);
+ in1 = _mm_srai_epi32(in1, 3);
+ _mm_store_si128((__m128i *)(output), in1);
+}
+
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
#include "vp9/encoder/x86/vp9_dct32x32_sse2.c"