Add col/row-based coefficient scanning patterns for 1D 8x8/16x16 ADSTs.

These are mostly just for experimental purposes. I saw small gains (in the 0.1% range) when playing with this on derf. Change-Id: Ib21eed477bbb46bddcd73b21c5c708a5b46abedc
author: Ronald S. Bultje <rbultje@google.com> 2013-03-25 12:30:00 -0700
committer: Ronald S. Bultje <rbultje@google.com> 2013-03-26 16:46:13 -0700
commit: d9094d8fd381726a9306c7fc32da3103aacc1a53 (patch)
tree: f9155ee33ef6981cfe0083b873e348f3e2f8db0a /vp9/encoder
parent: 3120dbddb12b6299c2d4f2b09c529c6fb2633c47 (diff)
download: libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar
libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar.gz
libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar.bz2
libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.zip
7 files changed, 143 insertions, 39 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 439006156..491ea62b5 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -173,8 +173,8 @@ struct macroblock {
   void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx);
   void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2);
-  void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx);
-  void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx);
+  void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
+  void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type);
 };
 
 #endif  // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index 9e5bcea16..eddacb872 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -155,12 +155,12 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
     tx_type = get_tx_type_8x8(xd, ib);
     if (tx_type != DCT_DCT) {
       vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
-      x->quantize_b_8x8(x, idx);
+      x->quantize_b_8x8(x, idx, tx_type);
       vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
                             16, tx_type);
     } else {
       x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
-      x->quantize_b_8x8(x, idx);
+      x->quantize_b_8x8(x, idx, DCT_DCT);
       vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
     }
   } else {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index f51f4c57a..3ad429a9e 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -598,20 +598,40 @@ static void optimize_b(VP9_COMMON *const cm,
       }
       break;
     }
-    case TX_8X8:
-      scan = vp9_default_zig_zag1d_8x8;
+    case TX_8X8: {
+      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+      const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1));
+      if (tx_type == DCT_ADST) {
+        scan = vp9_col_scan_8x8;
+      } else if (tx_type == ADST_DCT) {
+        scan = vp9_row_scan_8x8;
+      } else {
+        scan = vp9_default_zig_zag1d_8x8;
+      }
       default_eob = 64;
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
 #endif
       break;
-    case TX_16X16:
-      scan = vp9_default_zig_zag1d_16x16;
+    }
+    case TX_16X16: {
+      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+      const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2));
+      if (tx_type == DCT_ADST) {
+        scan = vp9_col_scan_16x16;
+      } else if (tx_type == ADST_DCT) {
+        scan = vp9_row_scan_16x16;
+      } else {
+        scan = vp9_default_zig_zag1d_16x16;
+      }
       default_eob = 256;
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
 #endif
       break;
+    }
     case TX_32X32:
       scan = vp9_default_zig_zag1d_32x32;
       default_eob = 1024;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 13958c03d..881fce50f 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -196,13 +196,26 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) {
   vp9_quantize_mbuv_4x4(x);
 }
 
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) {
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx;
   int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx;
   const int c_idx = plane_idx(xd, b_idx);
   BLOCK *const b = &mb->block[c_idx];
   BLOCKD *const d = &xd->block[c_idx];
+  const int *pt_scan;
+
+  switch (tx_type) {
+    case ADST_DCT:
+      pt_scan = vp9_row_scan_8x8;
+      break;
+    case DCT_ADST:
+      pt_scan = vp9_col_scan_8x8;
+      break;
+    default:
+      pt_scan = vp9_default_zig_zag1d_8x8;
+      break;
+  }
 
   vpx_memset(qcoeff_ptr, 0, 64 * sizeof(int16_t));
   vpx_memset(dqcoeff_ptr, 0, 64 * sizeof(int16_t));
@@ -254,7 +267,7 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx) {
       }
     }
     for (i = 1; i < 64; i++) {
-      rc   = vp9_default_zig_zag1d_8x8[i];
+      rc   = pt_scan[i];
       z    = coeff_ptr[rc];
       zbin = (zbin_ptr[1] + zbin_boost_ptr[zero_run] + zbin_oq_value);
       // The original code was incrementing zero_run while keeping it at
@@ -303,7 +316,8 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) {
   }
 #endif
   for (i = 0; i < 16; i += 4) {
-    x->quantize_b_8x8(x, i);
+    TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, (i & 8) + ((i & 4) >> 1));
+    x->quantize_b_8x8(x, i, tx_type);
   }
 }
 
@@ -316,7 +330,7 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
   }
 #endif
   for (i = 16; i < 24; i += 4)
-    x->quantize_b_8x8(x, i);
+    x->quantize_b_8x8(x, i, DCT_DCT);
 }
 
 void vp9_quantize_mb_8x8(MACROBLOCK *x) {
@@ -325,13 +339,14 @@ void vp9_quantize_mb_8x8(MACROBLOCK *x) {
 }
 
 void vp9_quantize_mby_16x16(MACROBLOCK *x) {
+  TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, 0);
 #if CONFIG_CODE_NONZEROCOUNT
   int i;
   for (i = 0; i < 16; i++) {
     x->e_mbd.nzcs[i] = 0;
   }
 #endif
-  x->quantize_b_16x16(x, 0);
+  x->quantize_b_16x16(x, 0, tx_type);
 }
 
 void vp9_quantize_mb_16x16(MACROBLOCK *x) {
@@ -400,11 +415,24 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
 #endif
 }
 
-void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) {
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
   MACROBLOCKD *const xd = &mb->e_mbd;
   const int c_idx = plane_idx(xd, b_idx);
   BLOCK *const b = &mb->block[c_idx];
   BLOCKD *const d = &xd->block[c_idx];
+  const int *pt_scan;
+
+  switch (tx_type) {
+    case ADST_DCT:
+      pt_scan = vp9_row_scan_16x16;
+      break;
+    case DCT_ADST:
+      pt_scan = vp9_col_scan_16x16;
+      break;
+    default:
+      pt_scan = vp9_default_zig_zag1d_16x16;
+      break;
+  }
 
   quantize(b->zrun_zbin_boost,
            mb->coeff + 16 * b_idx,
@@ -418,7 +446,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx) {
 #if CONFIG_CODE_NONZEROCOUNT
            &xd->nzcs[b_idx],
 #endif
-           vp9_default_zig_zag1d_16x16, 1);
+           pt_scan, 1);
 }
 
 void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
@@ -450,15 +478,21 @@ void vp9_quantize_sby_32x32(MACROBLOCK *x) {
 void vp9_quantize_sby_16x16(MACROBLOCK *x) {
   int n;
 
-  for (n = 0; n < 4; n++)
-    x->quantize_b_16x16(x, n * 16);
+  for (n = 0; n < 4; n++) {
+    TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
+                                        (16 * (n & 2)) + ((n & 1) * 4));
+    x->quantize_b_16x16(x, n * 16, tx_type);
+  }
 }
 
 void vp9_quantize_sby_8x8(MACROBLOCK *x) {
   int n;
 
-  for (n = 0; n < 16; n++)
-    x->quantize_b_8x8(x, n * 4);
+  for (n = 0; n < 16; n++) {
+    TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
+                                      (4 * (n & 12)) + ((n & 3) * 2));
+    x->quantize_b_8x8(x, n * 4, tx_type);
+  }
 }
 
 void vp9_quantize_sby_4x4(MACROBLOCK *x) {
@@ -476,15 +510,15 @@ void vp9_quantize_sby_4x4(MACROBLOCK *x) {
 }
 
 void vp9_quantize_sbuv_16x16(MACROBLOCK *x) {
-  x->quantize_b_16x16(x, 64);
-  x->quantize_b_16x16(x, 80);
+  x->quantize_b_16x16(x, 64, DCT_DCT);
+  x->quantize_b_16x16(x, 80, DCT_DCT);
 }
 
 void vp9_quantize_sbuv_8x8(MACROBLOCK *x) {
   int i;
 
   for (i = 64; i < 96; i += 4)
-    x->quantize_b_8x8(x, i);
+    x->quantize_b_8x8(x, i, DCT_DCT);
 }
 
 void vp9_quantize_sbuv_4x4(MACROBLOCK *x) {
@@ -504,15 +538,21 @@ void vp9_quantize_sb64y_32x32(MACROBLOCK *x) {
 void vp9_quantize_sb64y_16x16(MACROBLOCK *x) {
   int n;
 
-  for (n = 0; n < 16; n++)
-    x->quantize_b_16x16(x, n * 16);
+  for (n = 0; n < 16; n++) {
+    TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
+                                        (16 * (n & 12)) + ((n & 3) * 4));
+    x->quantize_b_16x16(x, n * 16, tx_type);
+  }
 }
 
 void vp9_quantize_sb64y_8x8(MACROBLOCK *x) {
   int n;
 
-  for (n = 0; n < 64; n++)
-    x->quantize_b_8x8(x, n * 4);
+  for (n = 0; n < 64; n++) {
+    TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
+                                      (4 * (n & 56)) + ((n & 7) * 2));
+    x->quantize_b_8x8(x, n * 4, tx_type);
+  }
 }
 
 void vp9_quantize_sb64y_4x4(MACROBLOCK *x) {
@@ -538,14 +578,14 @@ void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) {
   int i;
 
   for (i = 256; i < 384; i += 16)
-    x->quantize_b_16x16(x, i);
+    x->quantize_b_16x16(x, i, DCT_DCT);
 }
 
 void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) {
   int i;
 
   for (i = 256; i < 384; i += 4)
-    x->quantize_b_8x8(x, i);
+    x->quantize_b_8x8(x, i, DCT_DCT);
 }
 
 void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) {
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 739254025..6ba6cbdd9 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -29,8 +29,8 @@
 void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_ix, TX_TYPE type);
 void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx);
 void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2);
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx);
-void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx);
+void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
+void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type);
 void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx);
 
 void vp9_quantize_mb_4x4(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index f68afabed..8abcd1141 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -488,10 +488,20 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
       }
       break;
     }
-    case TX_8X8:
+    case TX_8X8: {
+      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+      const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+                              get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
       a_ec = (a[0] + a[1]) != 0;
       l_ec = (l[0] + l[1]) != 0;
-      scan = vp9_default_zig_zag1d_8x8;
+      if (tx_type == ADST_DCT) {
+        scan = vp9_row_scan_8x8;
+      } else if (tx_type == DCT_ADST) {
+        scan = vp9_col_scan_8x8;
+      } else {
+        scan = vp9_default_zig_zag1d_8x8;
+      }
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_8x8[nzc_context][ref][type];
 #else
@@ -499,8 +509,19 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
 #endif
       seg_eob = 64;
       break;
-    case TX_16X16:
-      scan = vp9_default_zig_zag1d_16x16;
+    }
+    case TX_16X16: {
+      const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+      const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+                              get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
+      if (tx_type == ADST_DCT) {
+        scan = vp9_row_scan_16x16;
+      } else if (tx_type == DCT_ADST) {
+        scan = vp9_col_scan_16x16;
+      } else {
+        scan = vp9_default_zig_zag1d_16x16;
+      }
 #if CONFIG_CODE_NONZEROCOUNT
       nzc_cost = mb->nzc_costs_16x16[nzc_context][ref][type];
 #else
@@ -515,6 +536,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
         l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
       }
       break;
+    }
     case TX_32X32:
       scan = vp9_default_zig_zag1d_32x32;
 #if CONFIG_CODE_NONZEROCOUNT
@@ -1498,7 +1520,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
         vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
       else
         x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
-      x->quantize_b_8x8(x, idx);
+      x->quantize_b_8x8(x, idx, tx_type);
 
       // compute quantization mse of 8x8 block
       distortion = vp9_block_error_c((x->block + idx)->coeff,
@@ -2503,7 +2525,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
       if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
         if (otherrd) {
           x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
-          x->quantize_b_8x8(x, idx);
+          x->quantize_b_8x8(x, idx, DCT_DCT);
           thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
           otherdist += thisdistortion;
           xd->mode_info_context->mbmi.txfm_size = TX_8X8;
@@ -2557,7 +2579,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
           }
         }
         x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
-        x->quantize_b_8x8(x, idx);
+        x->quantize_b_8x8(x, idx, DCT_DCT);
         thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
         *distortion += thisdistortion;
         *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index c770a5c59..6e2b8474d 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -181,15 +181,29 @@ static void tokenize_b(VP9_COMP *cpi,
       probs = cpi->common.fc.coef_probs_4x4;
       break;
     }
-    case TX_8X8:
+    case TX_8X8: {
+      const int sz = 3 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+                              get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
       a_ec = (a[0] + a[1]) != 0;
       l_ec = (l[0] + l[1]) != 0;
       seg_eob = 64;
       scan = vp9_default_zig_zag1d_8x8;
+      if (tx_type != DCT_DCT) {
+        if (tx_type == ADST_DCT) {
+          scan = vp9_row_scan_8x8;
+        } else if (tx_type == DCT_ADST) {
+          scan = vp9_col_scan_8x8;
+        }
+      }
       counts = cpi->coef_counts_8x8;
       probs = cpi->common.fc.coef_probs_8x8;
       break;
-    case TX_16X16:
+    }
+    case TX_16X16: {
+      const int sz = 4 + sb_type, x = ib & ((1 << sz) - 1), y = ib - x;
+      const TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
+                              get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
       if (type != PLANE_TYPE_UV) {
         a_ec = (a[0] + a[1] + a[2] + a[3]) != 0;
         l_ec = (l[0] + l[1] + l[2] + l[3]) != 0;
@@ -199,9 +213,17 @@ static void tokenize_b(VP9_COMP *cpi,
       }
       seg_eob = 256;
       scan = vp9_default_zig_zag1d_16x16;
+      if (tx_type != DCT_DCT) {
+        if (tx_type == ADST_DCT) {
+          scan = vp9_row_scan_16x16;
+        } else if (tx_type == DCT_ADST) {
+          scan = vp9_col_scan_16x16;
+        }
+      }
       counts = cpi->coef_counts_16x16;
       probs = cpi->common.fc.coef_probs_16x16;
       break;
+    }
     case TX_32X32:
       if (type != PLANE_TYPE_UV) {
         a_ec = (a[0] + a[1] + a[2] + a[3] +
author	Ronald S. Bultje <rbultje@google.com>	2013-03-25 12:30:00 -0700
committer	Ronald S. Bultje <rbultje@google.com>	2013-03-26 16:46:13 -0700
commit	d9094d8fd381726a9306c7fc32da3103aacc1a53 (patch)
tree	f9155ee33ef6981cfe0083b873e348f3e2f8db0a /vp9/encoder
parent	3120dbddb12b6299c2d4f2b09c529c6fb2633c47 (diff)
download	libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar.gz libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.tar.bz2 libvpx-d9094d8fd381726a9306c7fc32da3103aacc1a53.zip