Merge "Save pixels instead of coefficients in intra4x4 RD loop."

author: Ronald S. Bultje <rbultje@google.com> 2013-07-26 17:20:58 -0700
committer: Gerrit Code Review <gerrit@gerrit.golo.chromium.org> 2013-07-26 17:20:58 -0700
commit: dcacce6dd9a01bc1fade2c3aed4972662bb94f13 (patch)
tree: 9d137b3da4adc5c55cd78d0f5bed272c84929d41 /vp9
parent: d30c8f41efb0ba8ede9ca46d270b4ad398f192bf (diff)
parent: 7817d3221ff3a33781d05984bd71bebfc3793259 (diff)
download: libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar
libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar.gz
libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar.bz2
libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.zip
1 files changed, 14 insertions, 42 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 9d7033d60..1db4c5f77 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1186,17 +1186,19 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   struct macroblockd_plane *pd = &xd->plane[0];
   const int src_stride = p->src.stride;
   const int dst_stride = pd->dst.stride;
-  uint8_t *src, *dst;
+  uint8_t *src_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
+                                                p->src.buf, src_stride);
+  uint8_t *dst_init = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, ib,
+                                                pd->dst.buf, dst_stride);
   int16_t *src_diff, *coeff;
 
   ENTROPY_CONTEXT ta[2], tempa[2];
   ENTROPY_CONTEXT tl[2], templ[2];
   TX_TYPE tx_type = DCT_DCT;
-  TX_TYPE best_tx_type = DCT_DCT;
   int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
   int idx, idy, block;
-  DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
+  uint8_t best_dst[8 * 8];
 
   assert(ib < 4);
 
@@ -1224,17 +1226,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
         int64_t ssz;
         const int16_t *scan;
+        uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
+        uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
 
         block = ib + idy * 2 + idx;
         xd->mode_info_context->bmi[block].as_mode = mode;
-        src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
-                                        p->src.buf, src_stride);
         src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
                                              p->src_diff);
         coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
-        dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
-                                        pd->dst.buf, dst_stride);
-        vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8),
+        vp9_predict_intra_block(xd, block, 1,
                                 TX_4X4, mode,
                                 x->skip_encode ? src : dst,
                                 x->skip_encode ? src_stride : dst_stride,
@@ -1280,19 +1280,11 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
       *bestdistortion = distortion;
       best_rd = this_rd;
       *best_mode = mode;
-      best_tx_type = tx_type;
       vpx_memcpy(a, tempa, sizeof(tempa));
       vpx_memcpy(l, templ, sizeof(templ));
-      // FIXME(rbultje) why are we storing best_dqcoeff instead of the
-      // dst buffer here?
-      for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
-        for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
-          block = ib + idy * 2 + idx;
-          vpx_memcpy(best_dqcoeff[idy * 2 + idx],
-                     BLOCK_OFFSET(pd->dqcoeff, block, 16),
-                     sizeof(best_dqcoeff[0]));
-        }
-      }
+      for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+        vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
+                   num_4x4_blocks_wide * 4);
     }
   next:
     {}
@@ -1301,29 +1293,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   if (best_rd >= rd_thresh || x->skip_encode)
     return best_rd;
 
-  for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
-    for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
-      block = ib + idy * 2 + idx;
-      xd->mode_info_context->bmi[block].as_mode = *best_mode;
-      src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
-                                      p->src.buf, src_stride);
-      dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
-                                      pd->dst.buf, dst_stride);
-
-      vp9_predict_intra_block(xd, block, b_width_log2(BLOCK_SIZE_SB8X8), TX_4X4,
-                              *best_mode,
-                              x->skip_encode ? src : dst,
-                              x->skip_encode ? src_stride : dst_stride,
-                              dst, dst_stride);
-      // inverse transform
-      if (best_tx_type != DCT_DCT)
-        vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
-                             dst_stride, best_tx_type);
-      else
-        xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
-                           dst_stride);
-    }
-  }
+  for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
+    vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
+               num_4x4_blocks_wide * 4);
 
   return best_rd;
 }
author	Ronald S. Bultje <rbultje@google.com>	2013-07-26 17:20:58 -0700
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>	2013-07-26 17:20:58 -0700
commit	dcacce6dd9a01bc1fade2c3aed4972662bb94f13 (patch)
tree	9d137b3da4adc5c55cd78d0f5bed272c84929d41 /vp9
parent	d30c8f41efb0ba8ede9ca46d270b4ad398f192bf (diff)
parent	7817d3221ff3a33781d05984bd71bebfc3793259 (diff)
download	libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar.gz libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.tar.bz2 libvpx-dcacce6dd9a01bc1fade2c3aed4972662bb94f13.zip