summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2014-06-25 08:18:33 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2014-06-25 08:18:33 -0700
commitbccc785f63458136a8a72a51ed7b34a5811ffc4f (patch)
treeb6e7e9cec008a521af0d3ab72be35cd128879ba7
parentb8c382f8e76e0ee313b15e61ac658271e2047849 (diff)
parent0aae10007684fc3aa84ebf38a1a1d20e12ca8930 (diff)
downloadlibvpx-bccc785f63458136a8a72a51ed7b34a5811ffc4f.tar
libvpx-bccc785f63458136a8a72a51ed7b34a5811ffc4f.tar.gz
libvpx-bccc785f63458136a8a72a51ed7b34a5811ffc4f.tar.bz2
libvpx-bccc785f63458136a8a72a51ed7b34a5811ffc4f.zip
Merge "Reuse inter prediction result in real-time speed 6"
-rw-r--r--vp9/encoder/vp9_encodeframe.c5
-rw-r--r--vp9/encoder/vp9_pickmode.c108
-rw-r--r--vp9/encoder/vp9_pickmode.h6
-rw-r--r--vp9/encoder/vp9_speed_features.c4
-rw-r--r--vp9/encoder/vp9_speed_features.h5
5 files changed, 124 insertions, 4 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f031c18d9..b9349a49a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3318,7 +3318,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+ if (!cpi->sf.reuse_inter_pred_sby)
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
+
+ vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
if (!x->skip) {
mbmi->skip = 1;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 29896aa9e..a86053ff4 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -23,6 +23,7 @@
#include "vp9/common/vp9_reconintra.h"
#include "vp9/encoder/vp9_encoder.h"
+#include "vp9/encoder/vp9_pickmode.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rdopt.h"
@@ -183,6 +184,22 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
*out_dist_sum += dist << 4;
}
+static int get_pred_buffer(PRED_BUFFER *p, int len) {
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (!p[i].in_use) {
+ p[i].in_use = 1;
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void free_pred_buffer(PRED_BUFFER *p) {
+ p->in_use = 0;
+}
+
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -229,6 +246,31 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const int pred_filter_search = (((mi_row + mi_col) >> bsl) +
get_chessboard_index(cm)) % 2;
+ // For speed 6, the result of interp filter is reused later in actual encoding
+ // process.
+ int bh = num_4x4_blocks_high_lookup[bsize] << 2;
+ int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ int pixels_in_block = bh * bw;
+ // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
+ PRED_BUFFER tmp[4];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, pred_buf, 3 * 64 * 64);
+ struct buf_2d orig_dst = pd->dst;
+ PRED_BUFFER *best_pred = NULL;
+ PRED_BUFFER *this_mode_pred = NULL;
+ int i;
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ for (i = 0; i < 3; i++) {
+ tmp[i].data = &pred_buf[pixels_in_block * i];
+ tmp[i].stride = bw;
+ tmp[i].in_use = 0;
+ }
+
+ tmp[3].data = pd->dst.buf;
+ tmp[3].stride = pd->dst.stride;
+ tmp[3].in_use = 0;
+ }
+
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
x->skip = 0;
@@ -324,6 +366,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Search for the best prediction filter type, when the resulting
// motion vector is at sub-pixel accuracy level for luma component, i.e.,
// the last three bits are all zeros.
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (this_mode == NEARESTMV) {
+ this_mode_pred = &tmp[3];
+ } else {
+ this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = this_mode_pred->data;
+ pd->dst.stride = bw;
+ }
+ }
+
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
@@ -334,6 +386,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int pf_sse[3];
int64_t best_cost = INT64_MAX;
INTERP_FILTER best_filter = SWITCHABLE, filter;
+ PRED_BUFFER *current_pred = this_mode_pred;
for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
int64_t cost;
@@ -345,12 +398,28 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_get_switchable_rate(cpi) + pf_rate[filter],
pf_dist[filter]);
if (cost < best_cost) {
- best_filter = filter;
- best_cost = cost;
- skip_txfm = x->skip_txfm;
+ best_filter = filter;
+ best_cost = cost;
+ skip_txfm = x->skip_txfm;
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (this_mode_pred != current_pred) {
+ free_pred_buffer(this_mode_pred);
+ this_mode_pred = current_pred;
+ }
+
+ if (filter < EIGHTTAP_SHARP) {
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
+ }
+ }
}
}
+ if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
+ free_pred_buffer(current_pred);
+
mbmi->interp_filter = best_filter;
rate = pf_rate[mbmi->interp_filter];
dist = pf_dist[mbmi->interp_filter];
@@ -449,6 +518,16 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
best_pred_filter = mbmi->interp_filter;
best_ref_frame = ref_frame;
skip_txfm = x->skip_txfm;
+
+ if (cpi->sf.reuse_inter_pred_sby) {
+ if (best_pred != NULL)
+ free_pred_buffer(best_pred);
+
+ best_pred = this_mode_pred;
+ }
+ } else {
+ if (cpi->sf.reuse_inter_pred_sby)
+ free_pred_buffer(this_mode_pred);
}
if (x->skip)
@@ -456,6 +535,19 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
+ // If best prediction is not in dst buf, then copy the prediction block from
+ // temp buf to dst buf.
+ if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
+ uint8_t *copy_from, *copy_to;
+
+ pd->dst = orig_dst;
+ copy_to = pd->dst.buf;
+
+ copy_from = best_pred->data;
+
+ vp9_convolve_copy(copy_from, bw, copy_to, pd->dst.stride, NULL, 0, NULL, 0,
+ bw, bh);
+ }
mbmi->mode = best_mode;
mbmi->interp_filter = best_pred_filter;
@@ -469,12 +561,21 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (!x->skip && best_rd > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize) {
for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
+ if (cpi->sf.reuse_inter_pred_sby) {
+ pd->dst.buf = tmp[0].data;
+ pd->dst.stride = bw;
+ }
+
vp9_predict_intra_block(xd, 0, b_width_log2(bsize),
mbmi->tx_size, this_mode,
&p->src.buf[0], p->src.stride,
&pd->dst.buf[0], pd->dst.stride, 0, 0, 0);
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+
+ if (cpi->sf.reuse_inter_pred_sby)
+ pd->dst = orig_dst;
+
rate += cpi->mbmode_cost[this_mode];
rate += intra_cost_penalty;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
@@ -492,6 +593,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
}
}
}
+
#if CONFIG_DENOISING
vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, bsize);
#endif
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index a9c948d31..3d89974fc 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -17,6 +17,12 @@
extern "C" {
#endif
+typedef struct {
+ uint8_t *data;
+ int stride;
+ int in_use;
+} PRED_BUFFER;
+
int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const struct TileInfo *const tile,
int mi_row, int mi_col,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 9c3fb5ea0..d7017f269 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -277,6 +277,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
// is checked for a partition block. Later, we can try to allow large
// partitions to do intra mode checking.
sf->max_intra_bsize = BLOCK_8X8;
+
+ // This feature is only enabled when partition search is disabled.
+ sf->reuse_inter_pred_sby = 1;
}
if (speed >= 7) {
@@ -342,6 +345,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->max_intra_bsize = BLOCK_64X64;
+ sf->reuse_inter_pred_sby = 0;
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index f5d0b85e2..75070a70f 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -353,6 +353,11 @@ typedef struct SPEED_FEATURES {
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
unsigned int source_var_thresh;
+
+ // When partition is pre-set, the inter prediction result from pick_inter_mode
+ // can be reused in final block encoding process. It is enabled only for real-
+ // time mode speed 6.
+ int reuse_inter_pred_sby;
} SPEED_FEATURES;
struct VP9_COMP;