summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
authorDeb Mukherjee <debargha@google.com>2013-05-30 08:10:47 -0700
committerGerrit Code Review <gerrit@gerrit.golo.chromium.org>2013-05-30 08:10:47 -0700
commitc98bfcfbbb23dc7c8c8641831922877d0d31c67e (patch)
treec700b805ddde78db701295cd2b968903f460b30b /vp9/encoder
parentecf023f6e483706a4bf352b1dfcd9fb996ca4555 (diff)
parentb8b3f1a46d756bef73cb4f3cd6990a01cbacaa25 (diff)
downloadlibvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.gz
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.bz2
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.zip
Merge "Balancing coef-tree to reduce bool decodes" into experimental
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_bitstream.c29
-rw-r--r--vp9/encoder/vp9_block.h4
-rw-r--r--vp9/encoder/vp9_boolhuff.c1
-rw-r--r--vp9/encoder/vp9_encodemb.c38
-rw-r--r--vp9/encoder/vp9_onyx_if.c4
-rw-r--r--vp9/encoder/vp9_rdopt.c66
-rw-r--r--vp9/encoder/vp9_tokenize.c10
7 files changed, 138 insertions, 14 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 40489d59c..e4db5c1dc 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -473,7 +473,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
const vp9_prob *pp;
int v = a->value;
int n = a->len;
- int ncount = n;
vp9_prob probs[ENTROPY_NODES];
if (t == EOSB_TOKEN) {
@@ -489,18 +488,25 @@ static void pack_mb_tokens(vp9_writer* const bc,
assert(pp != 0);
/* skip one or two nodes */
+#if !CONFIG_BALANCED_COEFTREE
if (p->skip_eob_node) {
n -= p->skip_eob_node;
i = 2 * p->skip_eob_node;
- ncount -= p->skip_eob_node;
}
+#endif
do {
const int bb = (v >> --n) & 1;
+#if CONFIG_BALANCED_COEFTREE
+ if (i == 2 && p->skip_eob_node) {
+ i += 2;
+ assert(bb == 1);
+ continue;
+ }
+#endif
vp9_write(bc, bb, pp[i >> 1]);
i = vp9_coef_tree[i + bb];
- ncount--;
- } while (n && ncount);
+ } while (n);
if (b->base_val) {
const int e = p->extra, l = b->len;
@@ -862,8 +868,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
left_block_mode(m, i) : DC_PRED;
- write_kf_bmode(bc, m->bmi[i].as_mode.first,
- c->kf_bmode_prob[A][L]);
+ const int bm = m->bmi[i].as_mode.first;
+#ifdef ENTROPY_STATS
+ ++intra_mode_stats[A][L][bm];
+#endif
+ write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]);
}
}
}
@@ -1066,11 +1075,19 @@ static void build_tree_distribution(vp9_coeff_probs_model *coef_probs,
coef_probs[i][j][k][l],
coef_branch_ct[i][j][k][l],
model_counts, 0);
+#if CONFIG_BALANCED_COEFTREE
+ coef_branch_ct[i][j][k][l][1][1] = eob_branch_ct[i][j][k][l] -
+ coef_branch_ct[i][j][k][l][1][0];
+ coef_probs[i][j][k][l][1] =
+ get_binary_prob(coef_branch_ct[i][j][k][l][1][0],
+ coef_branch_ct[i][j][k][l][1][1]);
+#else
coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
coef_branch_ct[i][j][k][l][0][0];
coef_probs[i][j][k][l][0] =
get_binary_prob(coef_branch_ct[i][j][k][l][0][0],
coef_branch_ct[i][j][k][l][0][1]);
+#endif
#ifdef ENTROPY_STATS
if (!cpi->dummy_packing) {
for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 0e9b6804c..cf34cc1f7 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -133,7 +133,11 @@ struct macroblock {
unsigned char *active_ptr;
+ // note that token_costs is the cost when eob node is skipped
vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#if CONFIG_BALANCED_COEFTREE
+ vp9_coeff_count token_costs_noskip[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#endif
int optimize;
diff --git a/vp9/encoder/vp9_boolhuff.c b/vp9/encoder/vp9_boolhuff.c
index 0fcb2579f..86143ca57 100644
--- a/vp9/encoder/vp9_boolhuff.c
+++ b/vp9/encoder/vp9_boolhuff.c
@@ -10,6 +10,7 @@
#include <assert.h>
#include "vp9/encoder/vp9_boolhuff.h"
+#include "vp9/common/vp9_entropy.h"
#if defined(SECTIONBITS_OUTPUT)
unsigned __int64 Sectionbits[500];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 755ff21bf..98ea98031 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -215,10 +215,21 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
band = get_coef_band(band_translate, i + 1);
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
rate0 +=
- mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
+ mb->token_costs_noskip[tx_size][type][ref][band][pt]
+ [tokens[next][0].token];
rate1 +=
- mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
+ mb->token_costs_noskip[tx_size][type][ref][band][pt]
+ [tokens[next][1].token];
+#else
+ rate0 +=
+ mb->token_costs[tx_size][type][ref][band][pt]
+ [tokens[next][0].token];
+ rate1 +=
+ mb->token_costs[tx_size][type][ref][band][pt]
+ [tokens[next][1].token];
+#endif
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -266,14 +277,32 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
if (t0 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ if (!x)
+ rate0 += mb->token_costs[tx_size][type][ref][band][pt][
+ tokens[next][0].token];
+ else
+ rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
+ tokens[next][0].token];
+#else
rate0 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][0].token];
+#endif
}
if (t1 != DCT_EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ if (!x)
+ rate1 += mb->token_costs[tx_size][type][ref][band][pt][
+ tokens[next][1].token];
+ else
+ rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
+ tokens[next][1].token];
+#else
rate1 += mb->token_costs[tx_size][type][ref][band][pt][
tokens[next][1].token];
+#endif
}
}
@@ -326,8 +355,13 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
+#if CONFIG_BALANCED_COEFTREE
+ rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0];
+ rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1];
+#else
rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
+#endif
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = i0 - 1;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index b484925bb..d9c34fe9b 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -284,7 +284,6 @@ static void setup_features(VP9_COMP *cpi) {
set_default_lf_deltas(cpi);
}
-
static void dealloc_compressor_data(VP9_COMP *cpi) {
// Delete sementation map
vpx_free(cpi->segmentation_map);
@@ -2935,9 +2934,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#endif
// transform / motion compensation build reconstruction frame
- if (cm->frame_type == KEY_FRAME) {
- vp9_default_coef_probs(cm);
- }
vp9_encode_frame(cpi);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 52b4cc39e..87d560d4d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -108,11 +108,31 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
};
+#if CONFIG_BALANCED_COEFTREE
+static void fill_token_costs(vp9_coeff_count *c,
+ vp9_coeff_count *cnoskip,
+ vp9_coeff_probs_model *p,
+ TX_SIZE tx_size) {
+ int i, j, k, l;
+ for (i = 0; i < BLOCK_TYPES; i++)
+ for (j = 0; j < REF_TYPES; j++)
+ for (k = 0; k < COEF_BANDS; k++)
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ vp9_prob probs[ENTROPY_NODES];
+ vp9_model_to_full_probs(p[i][j][k][l], probs);
+ vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
+ vp9_coef_tree);
+ // Replace the eob node prob with a very small value so that the
+ // cost approximately equals the cost without the eob node
+ probs[1] = 1;
+ vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
+ }
+}
+#else
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs_model *p,
TX_SIZE tx_size) {
int i, j, k, l;
-
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
@@ -123,6 +143,7 @@ static void fill_token_costs(vp9_coeff_count *c,
vp9_coef_tree);
}
}
+#endif
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -213,14 +234,29 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
}
}
+#if CONFIG_BALANCED_COEFTREE
fill_token_costs(cpi->mb.token_costs[TX_4X4],
+ cpi->mb.token_costs_noskip[TX_4X4],
cpi->common.fc.coef_probs_4x4, TX_4X4);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
+ cpi->mb.token_costs_noskip[TX_8X8],
cpi->common.fc.coef_probs_8x8, TX_8X8);
fill_token_costs(cpi->mb.token_costs[TX_16X16],
+ cpi->mb.token_costs_noskip[TX_16X16],
cpi->common.fc.coef_probs_16x16, TX_16X16);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
+ cpi->mb.token_costs_noskip[TX_32X32],
cpi->common.fc.coef_probs_32x32, TX_32X32);
+#else
+ fill_token_costs(cpi->mb.token_costs[TX_4X4],
+ cpi->common.fc.coef_probs_4x4, TX_4X4);
+ fill_token_costs(cpi->mb.token_costs[TX_8X8],
+ cpi->common.fc.coef_probs_8x8, TX_8X8);
+ fill_token_costs(cpi->mb.token_costs[TX_16X16],
+ cpi->common.fc.coef_probs_16x16, TX_16X16);
+ fill_token_costs(cpi->mb.token_costs[TX_32X32],
+ cpi->common.fc.coef_probs_32x32, TX_32X32);
+#endif
for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
vp9_cost_tokens(cpi->mb.partition_cost[i],
@@ -274,7 +310,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_BALANCED_COEFTREE
+ unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+ mb->token_costs_noskip[tx_size][type][ref];
+#else
vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+#endif
+
int seg_eob, default_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
@@ -294,8 +336,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
coef_probs);
+#endif
seg_eob = 16;
scan = get_scan_4x4(tx_type);
band_translate = vp9_coefband_trans_4x4;
@@ -310,8 +354,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
scan = get_scan_8x8(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
coef_probs);
+#endif
seg_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
@@ -323,8 +369,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
scan = get_scan_16x16(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
coef_probs);
+#endif
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
@@ -333,8 +381,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
}
case TX_32X32:
scan = vp9_default_scan_32x32;
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
coef_probs);
+#endif
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
@@ -365,18 +415,30 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ if (!c || token_cache[scan[c - 1]]) // do not skip eob
+ cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
+ else
+ cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
+#else
cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
-
if (!c || token_cache[scan[c - 1]])
cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
+#endif
token_cache[scan[c]] = vp9_pt_energy_class[t];
}
if (c < seg_eob) {
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ cost += mb->token_costs_noskip[tx_size][type][ref]
+ [get_coef_band(band_translate, c)]
+ [pt][DCT_EOB_TOKEN];
+#else
cost += mb->token_costs[tx_size][type][ref]
[get_coef_band(band_translate, c)]
[pt][DCT_EOB_TOKEN];
+#endif
}
}
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index eb79de1d9..79f72bb4b 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -224,11 +224,21 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
t->token = token;
t->context_tree = coef_probs[type][ref][band][pt];
t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0);
+
+#if CONFIG_BALANCED_COEFTREE
+ assert(token <= ZERO_TOKEN ||
+ vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
+#else
assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
+#endif
if (!dry_run) {
++counts[type][ref][band][pt][token];
+#if CONFIG_BALANCED_COEFTREE
+ if (!t->skip_eob_node && token > ZERO_TOKEN)
+#else
if (!t->skip_eob_node)
+#endif
++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt];
}
token_cache[scan[c]] = vp9_pt_energy_class[token];