summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2014-03-06 18:56:50 -0800
committerJingning Han <jingning@google.com>2014-03-06 23:00:48 -0800
commit539c961ed4825bace64647e122893a77eeb9a0c4 (patch)
treef8cf01b0f952911bb8719c582c0cf3071269f640 /vp9
parent90a8dad162ab678e3f669c11ec7624b8fd00e94b (diff)
downloadlibvpx-539c961ed4825bace64647e122893a77eeb9a0c4.tar
libvpx-539c961ed4825bace64647e122893a77eeb9a0c4.tar.gz
libvpx-539c961ed4825bace64647e122893a77eeb9a0c4.tar.bz2
libvpx-539c961ed4825bace64647e122893a77eeb9a0c4.zip
Use modeled rate distortion costs for non-RD mode
This commit replaces SAD cost with modeled rate-distortion cost for non-RD mode decision. It translates the prediction residual SSE into estimate rate and reconstruction distorion costs, hence capturing the quantization setting effect. The compression performance of speed -7 for rtc set is improved by 14.79%. Change-Id: Ifda014eb0501d13109fe7f92680bf1410b463632
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_encodeframe.c3
-rw-r--r--vp9/encoder/vp9_pickmode.c72
-rw-r--r--vp9/encoder/vp9_rdopt.c18
-rw-r--r--vp9/encoder/vp9_rdopt.h4
4 files changed, 63 insertions, 34 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index e29fbd64a..435abddbe 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2010,7 +2010,6 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
-
cpi->mb.source_variance = UINT_MAX;
if (cpi->sf.partition_search_type == FIXED_PARTITION) {
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
@@ -2492,7 +2491,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
- if (cpi->sf.use_nonrd_pick_mode)
+ if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME)
encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
else
encode_rd_sb_row(cpi, &tile, mi_row, &tp);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 75122bc6f..4df489988 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -187,6 +187,38 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd,
+ int *out_rate_sum, int64_t *out_dist_sum) {
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ int64_t rate_sum = 0;
+ int64_t dist_sum = 0;
+ unsigned int sse;
+
+
+ struct macroblock_plane *const p = &x->plane[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+
+ (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, &sse);
+
+ {
+ int rate;
+ int64_t dist;
+ vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+ pd->dequant[1] >> 3, &rate, &dist);
+ rate_sum += rate;
+ dist_sum += dist;
+ }
+
+
+ *out_rate_sum = (int)rate_sum;
+ *out_dist_sum = dist_sum << 4;
+}
+
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -208,11 +240,13 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
int64_t this_rd = INT64_MAX;
- static const int cost[4]= { 0, 2, 4, 6 };
const int64_t inter_mode_thresh = 300;
const int64_t intra_mode_cost = 50;
+ int rate = INT_MAX;
+ int64_t dist = INT64_MAX;
+
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
x->skip = 0;
@@ -245,7 +279,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
- int rate_mv = 0;
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
@@ -258,9 +291,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[0] = ref_frame;
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
- int rate = cost[INTER_OFFSET(this_mode)]
- << (num_pels_log2_lookup[bsize] - 4);
- int64_t dist;
+ int rate_mv = 0;
+
if (cpi->sf.disable_inter_mode_mask[bsize] &
(1 << INTER_OFFSET(this_mode)))
continue;
@@ -280,22 +312,15 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
&frame_mv[NEWMV][ref_frame].as_mv);
}
- if (frame_mv[this_mode][ref_frame].as_int == 0) {
- dist = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
- } else if (this_mode != NEARESTMV &&
- frame_mv[NEARESTMV][ref_frame].as_int ==
- frame_mv[this_mode][ref_frame].as_int) {
- dist = x->mode_sad[ref_frame][INTER_OFFSET(NEARESTMV)];
- } else {
- mbmi->mode = this_mode;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] =
- cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride, INT_MAX);
- }
+ mbmi->mode = this_mode;
+ mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- this_rd = rate + dist;
+ model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+ rate += rate_mv;
+ rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ [INTER_OFFSET(this_mode)];
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
if (this_rd < best_rd) {
best_rd = this_rd;
@@ -319,10 +344,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
&p->src.buf[0], p->src.stride,
&pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
- this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf,
- p->src.stride,
- pd->dst.buf,
- pd->dst.stride, INT_MAX);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist);
+ rate += x->mbmode_cost[this_mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
if (this_rd + intra_mode_cost < best_rd) {
best_rd = this_rd;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e1230cabc..37390e41a 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -287,7 +287,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cpi);
- if (!cpi->sf.use_nonrd_pick_mode) {
+ if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
fill_token_costs(x->token_costs, cm->fc.coef_probs);
for (i = 0; i < PARTITION_CONTEXTS; i++)
@@ -295,7 +295,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_partition_tree);
}
- if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
+ if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+ cm->frame_type == KEY_FRAME) {
fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) {
@@ -394,9 +395,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
*d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
}
-static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
- unsigned int qstep, int *rate,
- int64_t *dist) {
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+ unsigned int qstep, int *rate,
+ int64_t *dist) {
// This function models the rate and distortion for a Laplacian
// source with given variance when quantized with a uniform quantizer
// with given stepsize. The closed form expressions are in:
@@ -458,8 +459,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
} else {
int rate;
int64_t dist;
- model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
- pd->dequant[1] >> 3, &rate, &dist);
+ vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+ pd->dequant[1] >> 3, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
}
@@ -506,7 +507,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
&pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
&sse);
// sse works better than var, since there is no dc prediction used
- model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
+ vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
+ &rate, &dist);
rate_sum += rate;
dist_sum += dist;
*out_skip &= (rate < 1024);
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index b5baa337d..7b88e582b 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -41,6 +41,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi);
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+ unsigned int qstep, int *rate,
+ int64_t *dist);
+
void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
const TileInfo *const tile,
MV_REFERENCE_FRAME ref_frame,