diff options
author | Gabriel Marin <gmx@chromium.org> | 2016-12-13 16:22:48 -0800 |
---|---|---|
committer | James Zern <jzern@google.com> | 2016-12-19 13:10:04 -0800 |
commit | 0549f5aae91b7d2f1265cea7e2edf34fda8f0433 (patch) | |
tree | 849dd360d9c7f9de4d8e03f464fa9dd1ae0ddad4 | |
parent | 9b63cb057a73fa1f053dfd665b23fb944a083443 (diff) | |
download | libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar.gz libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.tar.bz2 libvpx-0549f5aae91b7d2f1265cea7e2edf34fda8f0433.zip |
Simplify address arithmetic in vp9_optimize_b
Simplify address arithmetic on token_costs to reduce the number of generated
instructions that are used for address arithmetic inside routine
vp9_optimize_b. It also helps improve instruction scheduling depending on
compiler and optimization level.
Measured a 9.3% reduction in retired instructions and 5.3% reduction in
execution time for this routine with GCC v4.8.4 and optimization flags -O3,
and a reduction of up to 11.6% in execution time with other compilers.
No change in behavior.
TEST=Verified that encoded files match bit for bit, with and without this
change.
BUG=b/33678225
Change-Id: I6098650fb5cd2aa04e014fe6e68ca20761f3a21f
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 24 |
1 files changed, 10 insertions, 14 deletions
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 20ebe6819..c88efa526 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -109,6 +109,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int64_t error0, error1; int16_t t0, t1; EXTRABIT e0; + unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = + mb->token_costs[tx_size][type][ref]; int best, band, pt, i, final_eob; #if CONFIG_VP9_HIGHBITDEPTH const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd); @@ -148,10 +150,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, if (next < default_eob) { band = band_translate[i + 1]; pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += mb->token_costs[tx_size][type][ref][band][0][pt] - [tokens[next][0].token]; - rate1 += mb->token_costs[tx_size][type][ref][band][0][pt] - [tokens[next][1].token]; + rate0 += token_costs[band][0][pt][tokens[next][0].token]; + rate1 += token_costs[band][0][pt][tokens[next][1].token]; } UPDATE_RD_COST(); /* And pick the best. */ @@ -208,13 +208,11 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, band = band_translate[i + 1]; if (t0 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt] - [tokens[next][0].token]; + rate0 += token_costs[band][!x][pt][tokens[next][0].token]; } if (t1 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); - rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt] - [tokens[next][1].token]; + rate1 += token_costs[band][!x][pt][tokens[next][1].token]; } } @@ -270,13 +268,11 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ if (t0 != EOB_TOKEN) { - tokens[next][0].rate += - mb->token_costs[tx_size][type][ref][band][1][pt][t0]; + tokens[next][0].rate += token_costs[band][1][pt][t0]; tokens[next][0].token = ZERO_TOKEN; } if (t1 != EOB_TOKEN) { - tokens[next][1].rate += - mb->token_costs[tx_size][type][ref][band][1][pt][t1]; + tokens[next][1].rate += token_costs[band][1][pt][t1]; tokens[next][1].token = ZERO_TOKEN; } tokens[i][0].best_index = tokens[i][1].best_index = 0; @@ -292,8 +288,8 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0]; - rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1]; + rate0 += token_costs[band][0][ctx][t0]; + rate1 += token_costs[band][0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = -1; |