Merge "Using stride (# of elements) instead of pitch (bytes) in fdct4x4."

author: Dmitry Kovalev <dkovalev@google.com> 2013-10-22 13:05:24 -0700
committer: Gerrit Code Review <gerrit@gerrit.golo.chromium.org> 2013-10-22 13:05:24 -0700
commit: 9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25 (patch)
tree: 6e0865490136e90e5c8de2aae33e7393f471b68b /vp9/encoder
parent: c9af3de1557a6c99fada8b9b608ef14ba8f24a85 (diff)
parent: 190c2b4591039a2acef4964581e0d24d82de0d61 (diff)
download: libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar
libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar.gz
libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar.bz2
libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.zip
5 files changed, 13 insertions, 16 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index 461df635e..550cdee60 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -36,14 +36,13 @@ static void fdct4(const int16_t *input, int16_t *output) {
   output[3] = dct_const_round_shift(temp2);
 }
 
-void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_c(int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
   // as the first pass results are transposed, we tranpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
-  const int stride = pitch >> 1;
   int pass;
   // We need an intermediate buffer between passes.
   int16_t intermediate[4 * 4];
@@ -586,18 +585,17 @@ void vp9_short_fht8x8_c(int16_t *input, int16_t *output,
 
 /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
    pixel. */
-void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_walsh4x4_c(int16_t *input, int16_t *output, int stride) {
   int i;
   int a1, b1, c1, d1, e1;
   int16_t *ip = input;
   int16_t *op = output;
-  int pitch_short = pitch >> 1;
 
   for (i = 0; i < 4; i++) {
-    a1 = ip[0 * pitch_short];
-    b1 = ip[1 * pitch_short];
-    c1 = ip[2 * pitch_short];
-    d1 = ip[3 * pitch_short];
+    a1 = ip[0 * stride];
+    b1 = ip[1 * stride];
+    c1 = ip[2 * stride];
+    d1 = ip[3 * stride];
 
     a1 += b1;
     d1 = d1 - c1;
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index c3a231702..3358fbbe9 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -402,7 +402,7 @@ void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize,
       xoff = 4 * (block & twmask);
       yoff = 4 * (block >> twl);
       src_diff = p->src_diff + 4 * bw * yoff + xoff;
-      x->fwd_txm4x4(src_diff, coeff, bw * 8);
+      x->fwd_txm4x4(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
                      p->quant, p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
@@ -612,7 +612,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       if (tx_type != DCT_DCT)
         vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
       else
-        x->fwd_txm4x4(src_diff, coeff, bw * 8);
+        x->fwd_txm4x4(src_diff, coeff, bw * 4);
       vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
                      p->quant_shift, qcoeff, dqcoeff,
                      pd->dequant, p->zbin_extra, eob, scan, iscan);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index f6b2a2876..a2556f4e8 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -959,9 +959,9 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
     sf->optimize_coefficients = 0;
   }
 
-  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;
+  cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
-    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4;
+    cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
   }
 
   cpi->mb.quantize_b_4x4      = vp9_regular_quantize_b_4x4;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 2fd0fbf94..b6f37e9e1 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1089,7 +1089,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
           vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
           x->quantize_b_4x4(x, block, tx_type, 16);
         } else {
-          x->fwd_txm4x4(src_diff, coeff, 16);
+          x->fwd_txm4x4(src_diff, coeff, 8);
           x->quantize_b_4x4(x, block, tx_type, 16);
         }
 
@@ -1563,7 +1563,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
       k += (idy * 2 + idx);
       coeff = BLOCK_OFFSET(p->coeff, k);
       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
-                    coeff, 16);
+                    coeff, 8);
       x->quantize_b_4x4(x, k, DCT_DCT, 16);
       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
                                         16, &ssz);
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 88c133bf5..fa60e80eb 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -12,14 +12,13 @@
 #include "vp9/common/vp9_idct.h"  // for cospi constants
 #include "vpx_ports/mem.h"
 
-void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int pitch) {
+void vp9_short_fdct4x4_sse2(int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
   // as the first pass results are transposed, we tranpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
-  const int stride = pitch >> 1;
   int pass;
   // Constants
   //    When we use them, in one case, they are all the same. In all others
author	Dmitry Kovalev <dkovalev@google.com>	2013-10-22 13:05:24 -0700
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>	2013-10-22 13:05:24 -0700
commit	9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25 (patch)
tree	6e0865490136e90e5c8de2aae33e7393f471b68b /vp9/encoder
parent	c9af3de1557a6c99fada8b9b608ef14ba8f24a85 (diff)
parent	190c2b4591039a2acef4964581e0d24d82de0d61 (diff)
download	libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar.gz libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.tar.bz2 libvpx-9f09618bd4d1b90b0897fa8ddcb0d3c73b3d9b25.zip