summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulia Robson <juliamrobson@gmail.com>2016-02-26 14:42:25 -0800
committerAlex Converse <aconverse@google.com>2016-02-26 14:47:18 -0800
commit74a679de6f66b6a30d7399ae427ce2b85a967824 (patch)
tree555959d6c6753bee001fb9a58cc4505a58f67497
parent1ff2935ebf6c537340861bd590329ba5a49b96e3 (diff)
downloadlibvpx-74a679de6f66b6a30d7399ae427ce2b85a967824.tar
libvpx-74a679de6f66b6a30d7399ae427ce2b85a967824.tar.gz
libvpx-74a679de6f66b6a30d7399ae427ce2b85a967824.tar.bz2
libvpx-74a679de6f66b6a30d7399ae427ce2b85a967824.zip
Port "cost_coeff speed improvements" to vp9.
About a 5% faster overall encode (perf cycles) at speed zero! Change-Id: Iaf013ba75884415cd824e98349f654ffb1c3ef33
-rw-r--r--vp9/encoder/vp9_rdopt.c99
-rw-r--r--vp9/encoder/vp9_tokenize.c29
-rw-r--r--vp9/encoder/vp9_tokenize.h13
3 files changed, 103 insertions, 38 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 193c9d33c..508c59663 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -387,47 +387,70 @@ static int cost_coeffs(MACROBLOCK *x,
cost = token_costs[0][0][pt][EOB_TOKEN];
c = 0;
} else {
- int band_left = *band_count++;
-
- // dc token
- int v = qcoeff[0];
- int16_t prev_t;
- EXTRABIT e;
- vp9_get_token_extra(v, &prev_t, &e);
- cost = (*token_costs)[0][pt][prev_t] +
- vp9_get_cost(prev_t, e, cat6_high_cost);
-
- token_cache[0] = vp9_pt_energy_class[prev_t];
- ++token_costs;
-
- // ac tokens
- for (c = 1; c < eob; c++) {
- const int rc = scan[c];
- int16_t t;
-
- v = qcoeff[rc];
- vp9_get_token_extra(v, &t, &e);
- if (use_fast_coef_costing) {
- cost += (*token_costs)[!prev_t][!prev_t][t] +
- vp9_get_cost(t, e, cat6_high_cost);
- } else {
- pt = get_coef_context(nb, token_cache, c);
- cost += (*token_costs)[!prev_t][pt][t] +
- vp9_get_cost(t, e, cat6_high_cost);
- token_cache[rc] = vp9_pt_energy_class[t];
- }
- prev_t = t;
- if (!--band_left) {
- band_left = *band_count++;
- ++token_costs;
+ if (use_fast_coef_costing) {
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t prev_t;
+ cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
+ cost += (*token_costs)[0][pt][prev_t];
+
+ token_cache[0] = vp9_pt_energy_class[prev_t];
+ ++token_costs;
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+ int16_t t;
+
+ v = qcoeff[rc];
+ cost += vp9_get_token_cost(v, &t, cat6_high_cost);
+ cost += (*token_costs)[!prev_t][!prev_t][t];
+ prev_t = t;
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
}
- }
- // eob token
- if (band_left) {
- if (use_fast_coef_costing) {
+ // eob token
+ if (band_left)
cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
- } else {
+
+ } else { // !use_fast_coef_costing
+ int band_left = *band_count++;
+
+ // dc token
+ int v = qcoeff[0];
+ int16_t tok;
+ unsigned int (*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
+ cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
+ cost += (*token_costs)[0][pt][tok];
+
+ token_cache[0] = vp9_pt_energy_class[tok];
+ ++token_costs;
+
+ tok_cost_ptr = &((*token_costs)[!tok]);
+
+ // ac tokens
+ for (c = 1; c < eob; c++) {
+ const int rc = scan[c];
+
+ v = qcoeff[rc];
+ cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
+ pt = get_coef_context(nb, token_cache, c);
+ cost += (*tok_cost_ptr)[pt][tok];
+ token_cache[rc] = vp9_pt_energy_class[tok];
+ if (!--band_left) {
+ band_left = *band_count++;
+ ++token_costs;
+ }
+ tok_cost_ptr = &((*token_costs)[!tok]);
+ }
+
+ // eob token
+ if (band_left) {
pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 93be6d7ae..ee1d08adc 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -50,6 +50,35 @@ static const TOKENVALUE dct_cat_lt_10_value_tokens[] = {
const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens +
(sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens))
/ 2;
+// The corresponding costs of the extrabits for the tokens in the above table
+// are stored in the table below. The values are obtained from looking up the
+// entry for the specified extrabits in the table corresponding to the token
+// (as defined in cost element vp9_extra_bits)
+// e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1]
+static const int dct_cat_lt_10_value_cost[] = {
+ 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531,
+ 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190,
+ 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894,
+ 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553,
+ 3197, 3116, 3058, 2977, 2881, 2800,
+ 2742, 2661, 2615, 2534, 2476, 2395,
+ 2299, 2218, 2160, 2079,
+ 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652,
+ 1893, 1696, 1453, 1256, 1229, 864,
+ 512, 512, 512, 512, 0,
+ 512, 512, 512, 512,
+ 864, 1229, 1256, 1453, 1696, 1893,
+ 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566,
+ 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615,
+ 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197,
+ 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795,
+ 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136,
+ 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432,
+ 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773,
+};
+const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost +
+ (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost))
+ / 2;
// Array indices are identical to previously-existing CONTEXT_NODE indices
const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = {
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index df979b25d..fad798886 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -74,6 +74,7 @@ extern const int16_t *vp9_dct_value_cost_ptr;
*/
extern const TOKENVALUE *vp9_dct_value_tokens_ptr;
extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens;
+extern const int *vp9_dct_cat_lt_10_value_cost;
extern const int16_t vp9_cat6_low_cost[256];
extern const int vp9_cat6_high_cost[64];
extern const int vp9_cat6_high10_high_cost[256];
@@ -117,6 +118,18 @@ static INLINE int16_t vp9_get_token(int v) {
return vp9_dct_cat_lt_10_value_tokens[v].token;
}
+static INLINE int vp9_get_token_cost(int v, int16_t *token,
+ const int *cat6_high_table) {
+ if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) {
+ EXTRABIT extrabits;
+ *token = CATEGORY6_TOKEN;
+ extrabits = abs(v) - CAT6_MIN_VAL;
+ return vp9_cat6_low_cost[extrabits & 0xff] +
+ cat6_high_table[extrabits >> 8];
+ }
+ *token = vp9_dct_cat_lt_10_value_tokens[v].token;
+ return vp9_dct_cat_lt_10_value_cost[v];
+}
#ifdef __cplusplus
} // extern "C"