summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_encodemb.c
diff options
context:
space:
mode:
authorGabriel Marin <gmx@chromium.org>2016-12-13 16:22:48 -0800
committerJames Zern <jzern@google.com>2016-12-19 13:10:04 -0800
commit0549f5aae91b7d2f1265cea7e2edf34fda8f0433 (patch)
tree849dd360d9c7f9de4d8e03f464fa9dd1ae0ddad4 /vp9/encoder/vp9_encodemb.c
parent9b63cb057a73fa1f053dfd665b23fb944a083443 (diff)
downloadlibvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar
libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar.gz
libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar.bz2
libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.zip
Simplify address arithmetic in vp9_optimize_b
Simplify address arithmetic on token_costs to reduce the number of generated instructions that are used for address arithmetic inside routine vp9_optimize_b. It also helps improve instruction scheduling depending on compiler and optimization level. Measured a 9.3% reduction in retired instructions and 5.3% reduction in execution time for this routine with GCC v4.8.4 and optimization flags -O3, and a reduction of up to 11.6% in execution time with other compilers. No change in behavior. TEST=Verified that encoded files match bit for bit, with and without this change. BUG=b/33678225 Change-Id: I6098650fb5cd2aa04e014fe6e68ca20761f3a21f
Diffstat (limited to 'vp9/encoder/vp9_encodemb.c')
-rw-r--r--vp9/encoder/vp9_encodemb.c24
1 files changed, 10 insertions, 14 deletions
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 20ebe6819..c88efa526 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -109,6 +109,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
int64_t error0, error1;
int16_t t0, t1;
EXTRABIT e0;
+ unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
+ mb->token_costs[tx_size][type][ref];
int best, band, pt, i, final_eob;
#if CONFIG_VP9_HIGHBITDEPTH
const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
@@ -148,10 +150,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
if (next < default_eob) {
band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][0].token];
- rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][1].token];
+ rate0 += token_costs[band][0][pt][tokens[next][0].token];
+ rate1 += token_costs[band][0][pt][tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
@@ -208,13 +208,11 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
band = band_translate[i + 1];
if (t0 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
- [tokens[next][0].token];
+ rate0 += token_costs[band][!x][pt][tokens[next][0].token];
}
if (t1 != EOB_TOKEN) {
pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
- rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
- [tokens[next][1].token];
+ rate1 += token_costs[band][!x][pt][tokens[next][1].token];
}
}
@@ -270,13 +268,11 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
t1 = tokens[next][1].token;
/* Update the cost of each path if we're past the EOB token. */
if (t0 != EOB_TOKEN) {
- tokens[next][0].rate +=
- mb->token_costs[tx_size][type][ref][band][1][pt][t0];
+ tokens[next][0].rate += token_costs[band][1][pt][t0];
tokens[next][0].token = ZERO_TOKEN;
}
if (t1 != EOB_TOKEN) {
- tokens[next][1].rate +=
- mb->token_costs[tx_size][type][ref][band][1][pt][t1];
+ tokens[next][1].rate += token_costs[band][1][pt][t1];
tokens[next][1].token = ZERO_TOKEN;
}
tokens[i][0].best_index = tokens[i][1].best_index = 0;
@@ -292,8 +288,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
- rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
+ rate0 += token_costs[band][0][ctx][t0];
+ rate1 += token_costs[band][0][ctx][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
final_eob = -1;