summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
authorJingning Han <jingning@google.com>2013-05-15 12:19:59 -0700
committerJingning Han <jingning@google.com>2013-05-16 10:41:29 -0700
commit8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f (patch)
tree4e1ab770f1be852f8c787977fb2f6c06bd615d12 /vp9/encoder
parentc0f70cca406a2eca0d70476721b94754c2e5e4e2 (diff)
downloadlibvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.gz
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.bz2
libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.zip
Add building blocks for 4x8/8x4 rd search
These building blocks enable rate-distortion optimization search over block sizes of 8x4 and 4x8. Need to convert them into mmx/sse forms. Change-Id: I570ea2d22d14ceec3fe3575128d7dfa172a577de
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_onyx_if.c12
-rw-r--r--vp9/encoder/vp9_rdopt.c97
-rw-r--r--vp9/encoder/vp9_sad_c.c45
-rw-r--r--vp9/encoder/vp9_variance_c.c88
4 files changed, 193 insertions, 49 deletions
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 67d1b67fc..2d3fea975 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -1597,11 +1597,15 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL,
vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
- BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+ BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
+ vp9_sub_pixel_avg_variance8x4, NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad8x4x4d)
- BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+ BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
+ vp9_sub_pixel_avg_variance4x8, NULL, NULL,
+ NULL, NULL, NULL,
+ vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL,
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 50976642f..f928e7afe 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1096,6 +1096,50 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
return r;
}
+static enum BlockSize get_block_size(int bw, int bh) {
+ if (bw == 4 && bh == 4)
+ return BLOCK_4X4;
+
+ if (bw == 4 && bh == 8)
+ return BLOCK_4X8;
+
+ if (bw == 8 && bh == 4)
+ return BLOCK_8X4;
+
+ if (bw == 8 && bh == 8)
+ return BLOCK_8X8;
+
+ if (bw == 8 && bh == 16)
+ return BLOCK_8X16;
+
+ if (bw == 16 && bh == 8)
+ return BLOCK_16X8;
+
+ if (bw == 16 && bh == 16)
+ return BLOCK_16X16;
+
+ if (bw == 32 && bh == 32)
+ return BLOCK_32X32;
+
+ if (bw == 32 && bh == 16)
+ return BLOCK_32X16;
+
+ if (bw == 16 && bh == 32)
+ return BLOCK_16X32;
+
+ if (bw == 64 && bh == 32)
+ return BLOCK_64X32;
+
+ if (bw == 32 && bh == 64)
+ return BLOCK_32X64;
+
+ if (bw == 64 && bh == 64)
+ return BLOCK_64X64;
+
+ assert(0);
+ return -1;
+}
+
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
@@ -1111,6 +1155,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int sbr = 0, sbd = 0;
int segmentyrate = 0;
int best_eobs[4] = { 0 };
+#if CONFIG_AB4X4
+ BLOCK_SIZE_TYPE bsize = mbmi->sb_type;
+ int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+#endif
vp9_variance_fn_ptr_t *v_fn_ptr;
@@ -1120,7 +1168,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
+#if CONFIG_AB4X4
+ v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
+#else
v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
+#endif
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1670,51 +1722,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
frame_type, block_size);
}
-
-static enum BlockSize get_block_size(int bw, int bh) {
- if (bw == 4 && bh == 4)
- return BLOCK_4X4;
-
- if (bw == 4 && bh == 8)
- return BLOCK_4X8;
-
- if (bw == 8 && bh == 4)
- return BLOCK_8X4;
-
- if (bw == 8 && bh == 8)
- return BLOCK_8X8;
-
- if (bw == 8 && bh == 16)
- return BLOCK_8X16;
-
- if (bw == 16 && bh == 8)
- return BLOCK_16X8;
-
- if (bw == 16 && bh == 16)
- return BLOCK_16X16;
-
- if (bw == 32 && bh == 32)
- return BLOCK_32X32;
-
- if (bw == 32 && bh == 16)
- return BLOCK_32X16;
-
- if (bw == 16 && bh == 32)
- return BLOCK_16X32;
-
- if (bw == 64 && bh == 32)
- return BLOCK_64X32;
-
- if (bw == 32 && bh == 64)
- return BLOCK_32X64;
-
- if (bw == 64 && bh == 64)
- return BLOCK_64X64;
-
- assert(0);
- return -1;
-}
-
static void model_rd_from_var_lapndz(int var, int n, int qstep,
int *rate, int *dist) {
// This function models the rate and distortion for a Laplacian
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index b4cd19358..994828f20 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -156,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr,
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16);
}
+unsigned int vp9_sad8x4_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4);
+}
+
+unsigned int vp9_sad4x8_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ unsigned int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8);
+}
unsigned int vp9_sad4x4_c(const uint8_t *src_ptr,
int src_stride,
@@ -563,6 +578,36 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr,
ref_ptr[3], ref_stride, 0x7fffffff);
}
+void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
+void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t* const ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array) {
+ sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t* const ref_ptr[],
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index fa53abdec..e24a46b24 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -820,3 +820,91 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
+
+unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 5, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+
+ return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse,
+ const uint8_t *second_pred) {
+ uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 5, 8, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
+ comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
+ return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 4, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+
+ return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+}
+
+unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const uint8_t *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse,
+ const uint8_t *second_pred) {
+ uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering
+ uint8_t temp2[20 * 16];
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
+ const int16_t *hfilter, *vfilter;
+
+ hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
+ vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
+
+ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
+ 1, 17, 4, hfilter);
+ var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
+ comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
+ return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
+}