Add building blocks for 4x8/8x4 rd search

These building blocks enable rate-distortion optimization search over block sizes of 8x4 and 4x8. Need to convert them into mmx/sse forms. Change-Id: I570ea2d22d14ceec3fe3575128d7dfa172a577de
author: Jingning Han <jingning@google.com> 2013-05-15 12:19:59 -0700
committer: Jingning Han <jingning@google.com> 2013-05-16 10:41:29 -0700
commit: 8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f (patch)
tree: 4e1ab770f1be852f8c787977fb2f6c06bd615d12 /vp9/encoder
parent: c0f70cca406a2eca0d70476721b94754c2e5e4e2 (diff)
download: libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.gz
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.bz2
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.zip
4 files changed, 193 insertions, 49 deletions
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 67d1b67fc..2d3fea975 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1597,11 +1597,15 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
       vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
       vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
 
-  BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
-      NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+  BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
+      vp9_sub_pixel_avg_variance8x4, NULL, NULL,
+      NULL, NULL, NULL,
+      vp9_sad8x4x4d)
 
-  BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
-      NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+  BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
+      vp9_sub_pixel_avg_variance4x8, NULL, NULL,
+      NULL, NULL, NULL,
+      vp9_sad4x8x4d)
 
   BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
       vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 50976642f..f928e7afe 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1096,6 +1096,50 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
   return r;
 }
 
+static enum BlockSize get_block_size(int bw, int bh) {
+  if (bw == 4 && bh == 4)
+    return BLOCK_4X4;
+
+  if (bw == 4 && bh == 8)
+    return BLOCK_4X8;
+
+  if (bw == 8 && bh == 4)
+    return BLOCK_8X4;
+
+  if (bw == 8 && bh == 8)
+    return BLOCK_8X8;
+
+  if (bw == 8 && bh == 16)
+    return BLOCK_8X16;
+
+  if (bw == 16 && bh == 8)
+    return BLOCK_16X8;
+
+  if (bw == 16 && bh == 16)
+    return BLOCK_16X16;
+
+  if (bw == 32 && bh == 32)
+    return BLOCK_32X32;
+
+  if (bw == 32 && bh == 16)
+    return BLOCK_32X16;
+
+  if (bw == 16 && bh == 32)
+    return BLOCK_16X32;
+
+  if (bw == 64 && bh == 32)
+    return BLOCK_64X32;
+
+  if (bw == 32 && bh == 64)
+    return BLOCK_32X64;
+
+  if (bw == 64 && bh == 64)
+    return BLOCK_64X64;
+
+  assert(0);
+  return -1;
+}
+
 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     BEST_SEG_INFO *bsi,
                                     int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
@@ -1111,6 +1155,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   int sbr = 0, sbd = 0;
   int segmentyrate = 0;
   int best_eobs[4] = { 0 };
+#if CONFIG_AB4X4
+  BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
+  int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+#endif
 
   vp9_variance_fn_ptr_t *v_fn_ptr;
 
@@ -1120,7 +1168,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
   vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
 
+#if CONFIG_AB4X4
+  v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
+#else
   v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
+#endif
 
   // 64 makes this threshold really big effectively
   // making it so that we very rarely check mvs on
@@ -1670,51 +1722,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
             frame_type, block_size);
 }
 
-
-static enum BlockSize get_block_size(int bw, int bh) {
-  if (bw == 4 && bh == 4)
-    return BLOCK_4X4;
-
-  if (bw == 4 && bh == 8)
-    return BLOCK_4X8;
-
-  if (bw == 8 && bh == 4)
-    return BLOCK_8X4;
-
-  if (bw == 8 && bh == 8)
-    return BLOCK_8X8;
-
-  if (bw == 8 && bh == 16)
-    return BLOCK_8X16;
-
-  if (bw == 16 && bh == 8)
-    return BLOCK_16X8;
-
-  if (bw == 16 && bh == 16)
-    return BLOCK_16X16;
-
-  if (bw == 32 && bh == 32)
-    return BLOCK_32X32;
-
-  if (bw == 32 && bh == 16)
-    return BLOCK_32X16;
-
-  if (bw == 16 && bh == 32)
-    return BLOCK_16X32;
-
-  if (bw == 64 && bh == 32)
-    return BLOCK_64X32;
-
-  if (bw == 32 && bh == 64)
-    return BLOCK_32X64;
-
-  if (bw == 64 && bh == 64)
-    return BLOCK_64X64;
-
-  assert(0);
-  return -1;
-}
-
 static void model_rd_from_var_lapndz(int var, int n, int qstep,
                                      int *rate, int *dist) {
   // This function models the rate and distortion for a Laplacian
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index b4cd19358..994828f20 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -156,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr,
   return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
 }
 
+unsigned int vp9_sad8x4_c(const uint8_t *src_ptr,
+                          int src_stride,
+                          const uint8_t *ref_ptr,
+                          int ref_stride,
+                          unsigned int max_sad) {
+  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4);
+}
+
+unsigned int vp9_sad4x8_c(const uint8_t *src_ptr,
+                          int src_stride,
+                          const uint8_t *ref_ptr,
+                          int ref_stride,
+                          unsigned int max_sad) {
+  return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8);
+}
 
 unsigned int vp9_sad4x4_c(const uint8_t *src_ptr,
                           int  src_stride,
@@ -563,6 +578,36 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
                              ref_ptr[3], ref_stride, 0x7fffffff);
 }
 
+void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
+                     int  src_stride,
+                     const uint8_t* const ref_ptr[],
+                     int  ref_stride,
+                     unsigned int *sad_array) {
+  sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
+                            ref_ptr[0], ref_stride, 0x7fffffff);
+  sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
+                            ref_ptr[1], ref_stride, 0x7fffffff);
+  sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
+                            ref_ptr[2], ref_stride, 0x7fffffff);
+  sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
+                            ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
+                     int  src_stride,
+                     const uint8_t* const ref_ptr[],
+                     int  ref_stride,
+                     unsigned int *sad_array) {
+  sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
+                            ref_ptr[0], ref_stride, 0x7fffffff);
+  sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
+                            ref_ptr[1], ref_stride, 0x7fffffff);
+  sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
+                            ref_ptr[2], ref_stride, 0x7fffffff);
+  sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
+                            ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
 void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
                      int  src_stride,
                      const uint8_t* const ref_ptr[],
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index fa53abdec..e24a46b24 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -820,3 +820,91 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
   comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
   return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
+
+unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
+                                         int  src_pixels_per_line,
+                                         int  xoffset,
+                                         int  yoffset,
+                                         const uint8_t *dst_ptr,
+                                         int dst_pixels_per_line,
+                                         unsigned int *sse) {
+  uint16_t fdata3[8 * 5];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 5, 8, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+
+  return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
+                                             int  src_pixels_per_line,
+                                             int  xoffset,
+                                             int  yoffset,
+                                             const uint8_t *dst_ptr,
+                                             int dst_pixels_per_line,
+                                             unsigned int *sse,
+                                             const uint8_t *second_pred) {
+  uint16_t fdata3[8 * 5];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 5, 8, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+  comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+  return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
+                                         int  src_pixels_per_line,
+                                         int  xoffset,
+                                         int  yoffset,
+                                         const uint8_t *dst_ptr,
+                                         int dst_pixels_per_line,
+                                         unsigned int *sse) {
+  uint16_t fdata3[5 * 8];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 17, 4, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+
+  return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
+                                             int  src_pixels_per_line,
+                                             int  xoffset,
+                                             int  yoffset,
+                                             const uint8_t *dst_ptr,
+                                             int dst_pixels_per_line,
+                                             unsigned int *sse,
+                                             const uint8_t *second_pred) {
+  uint16_t fdata3[5 * 8];  // Temp data bufffer used in filtering
+  uint8_t temp2[20 * 16];
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
+  const int16_t *hfilter, *vfilter;
+
+  hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+  vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+                                    1, 17, 4, hfilter);
+  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+  comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+  return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
+}
author	Jingning Han <jingning@google.com>	2013-05-15 12:19:59 -0700
committer	Jingning Han <jingning@google.com>	2013-05-16 10:41:29 -0700
commit	8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f (patch)
tree	4e1ab770f1be852f8c787977fb2f6c06bd615d12 /vp9/encoder
parent	c0f70cca406a2eca0d70476721b94754c2e5e4e2 (diff)
download	libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.gz libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.bz2 libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.zip