Merge "Balancing coef-tree to reduce bool decodes" into experimental

author: Deb Mukherjee <debargha@google.com> 2013-05-30 08:10:47 -0700
committer: Gerrit Code Review <gerrit@gerrit.golo.chromium.org> 2013-05-30 08:10:47 -0700
commit: c98bfcfbbb23dc7c8c8641831922877d0d31c67e (patch)
tree: c700b805ddde78db701295cd2b968903f460b30b /vp9/encoder
parent: ecf023f6e483706a4bf352b1dfcd9fb996ca4555 (diff)
parent: b8b3f1a46d756bef73cb4f3cd6990a01cbacaa25 (diff)
download: libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.gz
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.bz2
libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.zip
7 files changed, 138 insertions, 14 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 40489d59c..e4db5c1dc 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -473,7 +473,6 @@ static void pack_mb_tokens(vp9_writer* const bc,
     const vp9_prob *pp;
     int v = a->value;
     int n = a->len;
-    int ncount = n;
     vp9_prob probs[ENTROPY_NODES];
 
     if (t == EOSB_TOKEN) {
@@ -489,18 +488,25 @@ static void pack_mb_tokens(vp9_writer* const bc,
     assert(pp != 0);
 
     /* skip one or two nodes */
+#if !CONFIG_BALANCED_COEFTREE
     if (p->skip_eob_node) {
       n -= p->skip_eob_node;
       i = 2 * p->skip_eob_node;
-      ncount -= p->skip_eob_node;
     }
+#endif
 
     do {
       const int bb = (v >> --n) & 1;
+#if CONFIG_BALANCED_COEFTREE
+      if (i == 2 && p->skip_eob_node) {
+        i += 2;
+        assert(bb == 1);
+        continue;
+      }
+#endif
       vp9_write(bc, bb, pp[i >> 1]);
       i = vp9_coef_tree[i + bb];
-      ncount--;
-    } while (n && ncount);
+    } while (n);
 
     if (b->base_val) {
       const int e = p->extra, l = b->len;
@@ -862,8 +868,11 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
         const MB_PREDICTION_MODE A = above_block_mode(m, i, mis);
         const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
                                      left_block_mode(m, i) : DC_PRED;
-        write_kf_bmode(bc, m->bmi[i].as_mode.first,
-                       c->kf_bmode_prob[A][L]);
+        const int bm = m->bmi[i].as_mode.first;
+#ifdef ENTROPY_STATS
+        ++intra_mode_stats[A][L][bm];
+#endif
+        write_kf_bmode(bc, bm, c->kf_bmode_prob[A][L]);
       }
     }
   }
@@ -1066,11 +1075,19 @@ static void build_tree_distribution(vp9_coeff_probs_model *coef_probs,
                                            coef_probs[i][j][k][l],
                                            coef_branch_ct[i][j][k][l],
                                            model_counts, 0);
+#if CONFIG_BALANCED_COEFTREE
+          coef_branch_ct[i][j][k][l][1][1] = eob_branch_ct[i][j][k][l] -
+                                             coef_branch_ct[i][j][k][l][1][0];
+          coef_probs[i][j][k][l][1] =
+              get_binary_prob(coef_branch_ct[i][j][k][l][1][0],
+                              coef_branch_ct[i][j][k][l][1][1]);
+#else
           coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] -
                                              coef_branch_ct[i][j][k][l][0][0];
           coef_probs[i][j][k][l][0] =
               get_binary_prob(coef_branch_ct[i][j][k][l][0][0],
                               coef_branch_ct[i][j][k][l][0][1]);
+#endif
 #ifdef ENTROPY_STATS
           if (!cpi->dummy_packing) {
             for (t = 0; t < MAX_ENTROPY_TOKENS; ++t)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 0e9b6804c..cf34cc1f7 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -133,7 +133,11 @@ struct macroblock {
 
   unsigned char *active_ptr;
 
+  // note that token_costs is the cost when eob node is skipped
   vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#if CONFIG_BALANCED_COEFTREE
+  vp9_coeff_count token_costs_noskip[TX_SIZE_MAX_SB][BLOCK_TYPES];
+#endif
 
   int optimize;
 
diff --git a/vp9/encoder/vp9_boolhuff.c b/vp9/encoder/vp9_boolhuff.c
index 0fcb2579f..86143ca57 100644
--- a/vp9/encoder/vp9_boolhuff.c
+++ b/vp9/encoder/vp9_boolhuff.c
@@ -10,6 +10,7 @@
 
 #include <assert.h>
 #include "vp9/encoder/vp9_boolhuff.h"
+#include "vp9/common/vp9_entropy.h"
 
 #if defined(SECTIONBITS_OUTPUT)
 unsigned __int64 Sectionbits[500];
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 755ff21bf..98ea98031 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -215,10 +215,21 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
         band = get_coef_band(band_translate, i + 1);
         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                        pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
         rate0 +=
-          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][0].token];
+          mb->token_costs_noskip[tx_size][type][ref][band][pt]
+                                [tokens[next][0].token];
         rate1 +=
-          mb->token_costs[tx_size][type][ref][band][pt][tokens[next][1].token];
+          mb->token_costs_noskip[tx_size][type][ref][band][pt]
+                                [tokens[next][1].token];
+#else
+        rate0 +=
+          mb->token_costs[tx_size][type][ref][band][pt]
+                         [tokens[next][0].token];
+        rate1 +=
+          mb->token_costs[tx_size][type][ref][band][pt]
+                         [tokens[next][1].token];
+#endif
       }
       UPDATE_RD_COST();
       /* And pick the best. */
@@ -266,14 +277,32 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
         if (t0 != DCT_EOB_TOKEN) {
           pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                          pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+          if (!x)
+            rate0 += mb->token_costs[tx_size][type][ref][band][pt][
+                tokens[next][0].token];
+          else
+            rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
+                tokens[next][0].token];
+#else
           rate0 += mb->token_costs[tx_size][type][ref][band][pt][
               tokens[next][0].token];
+#endif
         }
         if (t1 != DCT_EOB_TOKEN) {
           pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                          pad, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+          if (!x)
+            rate1 += mb->token_costs[tx_size][type][ref][band][pt][
+                tokens[next][1].token];
+          else
+            rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
+                tokens[next][1].token];
+#else
           rate1 += mb->token_costs[tx_size][type][ref][band][pt][
               tokens[next][1].token];
+#endif
         }
       }
 
@@ -326,8 +355,13 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
   error1 = tokens[next][1].error;
   t0 = tokens[next][0].token;
   t1 = tokens[next][1].token;
+#if CONFIG_BALANCED_COEFTREE
+  rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0];
+  rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1];
+#else
   rate0 += mb->token_costs[tx_size][type][ref][band][pt][t0];
   rate1 += mb->token_costs[tx_size][type][ref][band][pt][t1];
+#endif
   UPDATE_RD_COST();
   best = rd_cost1 < rd_cost0;
   final_eob = i0 - 1;
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index b484925bb..d9c34fe9b 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -284,7 +284,6 @@ static void setup_features(VP9_COMP *cpi) {
   set_default_lf_deltas(cpi);
 }
 
-
 static void dealloc_compressor_data(VP9_COMP *cpi) {
   // Delete sementation map
   vpx_free(cpi->segmentation_map);
@@ -2935,9 +2934,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 #endif
 
     // transform / motion compensation build reconstruction frame
-    if (cm->frame_type == KEY_FRAME) {
-      vp9_default_coef_probs(cm);
-    }
 
     vp9_encode_frame(cpi);
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 52b4cc39e..87d560d4d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -108,11 +108,31 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {SPLITMV,   GOLDEN_FRAME, ALTREF_FRAME},
 };
 
+#if CONFIG_BALANCED_COEFTREE
+static void fill_token_costs(vp9_coeff_count *c,
+                             vp9_coeff_count *cnoskip,
+                             vp9_coeff_probs_model *p,
+                             TX_SIZE tx_size) {
+  int i, j, k, l;
+  for (i = 0; i < BLOCK_TYPES; i++)
+    for (j = 0; j < REF_TYPES; j++)
+      for (k = 0; k < COEF_BANDS; k++)
+        for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+          vp9_prob probs[ENTROPY_NODES];
+          vp9_model_to_full_probs(p[i][j][k][l], probs);
+          vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
+                          vp9_coef_tree);
+          // Replace the eob node prob with a very small value so that the
+          // cost approximately equals the cost without the eob node
+          probs[1] = 1;
+          vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
+        }
+}
+#else
 static void fill_token_costs(vp9_coeff_count *c,
                              vp9_coeff_probs_model *p,
                              TX_SIZE tx_size) {
   int i, j, k, l;
-
   for (i = 0; i < BLOCK_TYPES; i++)
     for (j = 0; j < REF_TYPES; j++)
       for (k = 0; k < COEF_BANDS; k++)
@@ -123,6 +143,7 @@ static void fill_token_costs(vp9_coeff_count *c,
                                vp9_coef_tree);
         }
 }
+#endif
 
 static int rd_iifactor[32] =  { 4, 4, 3, 2, 1, 0, 0, 0,
                                 0, 0, 0, 0, 0, 0, 0, 0,
@@ -213,14 +234,29 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
     }
   }
 
+#if CONFIG_BALANCED_COEFTREE
   fill_token_costs(cpi->mb.token_costs[TX_4X4],
+                   cpi->mb.token_costs_noskip[TX_4X4],
                    cpi->common.fc.coef_probs_4x4, TX_4X4);
   fill_token_costs(cpi->mb.token_costs[TX_8X8],
+                   cpi->mb.token_costs_noskip[TX_8X8],
                    cpi->common.fc.coef_probs_8x8, TX_8X8);
   fill_token_costs(cpi->mb.token_costs[TX_16X16],
+                   cpi->mb.token_costs_noskip[TX_16X16],
                    cpi->common.fc.coef_probs_16x16, TX_16X16);
   fill_token_costs(cpi->mb.token_costs[TX_32X32],
+                   cpi->mb.token_costs_noskip[TX_32X32],
                    cpi->common.fc.coef_probs_32x32, TX_32X32);
+#else
+  fill_token_costs(cpi->mb.token_costs[TX_4X4],
+                   cpi->common.fc.coef_probs_4x4, TX_4X4);
+  fill_token_costs(cpi->mb.token_costs[TX_8X8],
+                   cpi->common.fc.coef_probs_8x8, TX_8X8);
+  fill_token_costs(cpi->mb.token_costs[TX_16X16],
+                   cpi->common.fc.coef_probs_16x16, TX_16X16);
+  fill_token_costs(cpi->mb.token_costs[TX_32X32],
+                   cpi->common.fc.coef_probs_32x32, TX_32X32);
+#endif
 
   for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
     vp9_cost_tokens(cpi->mb.partition_cost[i],
@@ -274,7 +310,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
   TX_TYPE tx_type = DCT_DCT;
 
   const int segment_id = xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_BALANCED_COEFTREE
+  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+      mb->token_costs_noskip[tx_size][type][ref];
+#else
   vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+#endif
+
   int seg_eob, default_eob;
   uint8_t token_cache[1024];
   const uint8_t * band_translate;
@@ -294,8 +336,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
           get_tx_type_4x4(xd, block) : DCT_DCT;
       above_ec = A[0] != 0;
       left_ec = L[0] != 0;
+#if !CONFIG_BALANCED_COEFTREE
       vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
                                  coef_probs);
+#endif
       seg_eob = 16;
       scan = get_scan_4x4(tx_type);
       band_translate = vp9_coefband_trans_4x4;
@@ -310,8 +354,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
       above_ec = (A[0] + A[1]) != 0;
       left_ec = (L[0] + L[1]) != 0;
       scan = get_scan_8x8(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
       vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
                                  coef_probs);
+#endif
       seg_eob = 64;
       band_translate = vp9_coefband_trans_8x8plus;
       break;
@@ -323,8 +369,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
       TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
           get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
       scan = get_scan_16x16(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
       vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
                                  coef_probs);
+#endif
       seg_eob = 256;
       above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
       left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
@@ -333,8 +381,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
     }
     case TX_32X32:
       scan = vp9_default_scan_32x32;
+#if !CONFIG_BALANCED_COEFTREE
       vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
                                  coef_probs);
+#endif
       seg_eob = 1024;
       above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
       left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
@@ -365,18 +415,30 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
       if (c)
         pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
 
+#if CONFIG_BALANCED_COEFTREE
+      if (!c || token_cache[scan[c - 1]])  // do not skip eob
+        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
+      else
+        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
+#else
       cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
-
       if (!c || token_cache[scan[c - 1]])
         cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
+#endif
       token_cache[scan[c]] = vp9_pt_energy_class[t];
     }
     if (c < seg_eob) {
       if (c)
         pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+      cost += mb->token_costs_noskip[tx_size][type][ref]
+          [get_coef_band(band_translate, c)]
+          [pt][DCT_EOB_TOKEN];
+#else
       cost += mb->token_costs[tx_size][type][ref]
           [get_coef_band(band_translate, c)]
           [pt][DCT_EOB_TOKEN];
+#endif
     }
   }
 
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index eb79de1d9..79f72bb4b 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -224,11 +224,21 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
     t->token = token;
     t->context_tree = coef_probs[type][ref][band][pt];
     t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0);
+
+#if CONFIG_BALANCED_COEFTREE
+    assert(token <= ZERO_TOKEN ||
+           vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
+#else
     assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
+#endif
 
     if (!dry_run) {
       ++counts[type][ref][band][pt][token];
+#if CONFIG_BALANCED_COEFTREE
+      if (!t->skip_eob_node && token > ZERO_TOKEN)
+#else
       if (!t->skip_eob_node)
+#endif
         ++cpi->common.fc.eob_branch_counts[tx_size][type][ref][band][pt];
     }
     token_cache[scan[c]] = vp9_pt_energy_class[token];
author	Deb Mukherjee <debargha@google.com>	2013-05-30 08:10:47 -0700
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>	2013-05-30 08:10:47 -0700
commit	c98bfcfbbb23dc7c8c8641831922877d0d31c67e (patch)
tree	c700b805ddde78db701295cd2b968903f460b30b /vp9/encoder
parent	ecf023f6e483706a4bf352b1dfcd9fb996ca4555 (diff)
parent	b8b3f1a46d756bef73cb4f3cd6990a01cbacaa25 (diff)
download	libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.gz libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.tar.bz2 libvpx-c98bfcfbbb23dc7c8c8641831922877d0d31c67e.zip