diff options
author | Jingning Han <jingning@google.com> | 2013-05-15 12:19:59 -0700 |
---|---|---|
committer | Jingning Han <jingning@google.com> | 2013-05-16 10:41:29 -0700 |
commit | 8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f (patch) | |
tree | 4e1ab770f1be852f8c787977fb2f6c06bd615d12 /vp9/encoder | |
parent | c0f70cca406a2eca0d70476721b94754c2e5e4e2 (diff) | |
download | libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.gz libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.tar.bz2 libvpx-8e3d0e4d7db867caa110e96fa0fd1ff9ba37cb9f.zip |
Add building blocks for 4x8/8x4 rd search
These building blocks enable rate-distortion optimization search
over block sizes of 8x4 and 4x8. Need to convert them into mmx/sse
forms.
Change-Id: I570ea2d22d14ceec3fe3575128d7dfa172a577de
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 97 | ||||
-rw-r--r-- | vp9/encoder/vp9_sad_c.c | 45 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance_c.c | 88 |
4 files changed, 193 insertions, 49 deletions
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 67d1b67fc..2d3fea975 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1597,11 +1597,15 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_sub_pixel_avg_variance8x8, NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) - BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL) + BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4, + vp9_sub_pixel_avg_variance8x4, NULL, NULL, + NULL, NULL, NULL, + vp9_sad8x4x4d) - BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL) + BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8, + vp9_sub_pixel_avg_variance4x8, NULL, NULL, + NULL, NULL, NULL, + vp9_sad4x8x4d) BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, vp9_sub_pixel_avg_variance4x4, NULL, NULL, NULL, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 50976642f..f928e7afe 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1096,6 +1096,50 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { return r; } +static enum BlockSize get_block_size(int bw, int bh) { + if (bw == 4 && bh == 4) + return BLOCK_4X4; + + if (bw == 4 && bh == 8) + return BLOCK_4X8; + + if (bw == 8 && bh == 4) + return BLOCK_8X4; + + if (bw == 8 && bh == 8) + return BLOCK_8X8; + + if (bw == 8 && bh == 16) + return BLOCK_8X16; + + if (bw == 16 && bh == 8) + return BLOCK_16X8; + + if (bw == 16 && bh == 16) + return BLOCK_16X16; + + if (bw == 32 && bh == 32) + return BLOCK_32X32; + + if (bw == 32 && bh == 16) + return BLOCK_32X16; + + if (bw == 16 && bh == 32) + return BLOCK_16X32; + + if (bw == 64 && bh == 32) + return BLOCK_64X32; + + if (bw == 32 && bh == 64) + return BLOCK_32X64; + + if (bw == 64 && bh == 64) + return BLOCK_64X64; + + assert(0); + return -1; +} + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { @@ -1111,6 +1155,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int sbr = 0, sbd = 0; int segmentyrate = 0; int best_eobs[4] = { 0 }; +#if CONFIG_AB4X4 + BLOCK_SIZE_TYPE bsize = mbmi->sb_type; + int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); +#endif vp9_variance_fn_ptr_t *v_fn_ptr; @@ -1120,7 +1168,11 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); +#if CONFIG_AB4X4 + v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)]; +#else v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; +#endif // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -1670,51 +1722,6 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, frame_type, block_size); } - -static enum BlockSize get_block_size(int bw, int bh) { - if (bw == 4 && bh == 4) - return BLOCK_4X4; - - if (bw == 4 && bh == 8) - return BLOCK_4X8; - - if (bw == 8 && bh == 4) - return BLOCK_8X4; - - if (bw == 8 && bh == 8) - return BLOCK_8X8; - - if (bw == 8 && bh == 16) - return BLOCK_8X16; - - if (bw == 16 && bh == 8) - return BLOCK_16X8; - - if (bw == 16 && bh == 16) - return BLOCK_16X16; - - if (bw == 32 && bh == 32) - return BLOCK_32X32; - - if (bw == 32 && bh == 16) - return BLOCK_32X16; - - if (bw == 16 && bh == 32) - return BLOCK_16X32; - - if (bw == 64 && bh == 32) - return BLOCK_64X32; - - if (bw == 32 && bh == 64) - return BLOCK_32X64; - - if (bw == 64 && bh == 64) - return BLOCK_64X64; - - assert(0); - return -1; -} - static void model_rd_from_var_lapndz(int var, int n, int qstep, int *rate, int *dist) { // This function models the rate and distortion for a Laplacian diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index b4cd19358..994828f20 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -156,6 +156,21 @@ unsigned int vp9_sad8x16_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 16); } +unsigned int vp9_sad8x4_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 8, 4); +} + +unsigned int vp9_sad4x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 8); +} unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, int src_stride, @@ -563,6 +578,36 @@ void vp9_sad8x16x4d_c(const uint8_t *src_ptr, ref_ptr[3], ref_stride, 0x7fffffff); } +void vp9_sad8x4x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad8x4(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + +void vp9_sad4x8x4d_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t* const ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad4x8(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + void vp9_sad4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_ptr[], diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index fa53abdec..e24a46b24 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -820,3 +820,91 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); return vp9_variance8x16_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } + +unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); + + return vp9_variance8x4_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[8 * 5]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 5, 8, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); + comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); + return vp9_variance8x4_c(temp3, 8, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 4, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); + + return vp9_variance4x8_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse); +} + +unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse, + const uint8_t *second_pred) { + uint16_t fdata3[5 * 8]; // Temp data bufffer used in filtering + uint8_t temp2[20 * 16]; + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer + const int16_t *hfilter, *vfilter; + + hfilter = VP9_BILINEAR_FILTERS_2TAP(xoffset); + vfilter = VP9_BILINEAR_FILTERS_2TAP(yoffset); + + var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, + 1, 17, 4, hfilter); + var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); + comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); + return vp9_variance4x8_c(temp3, 4, dst_ptr, dst_pixels_per_line, sse); +} |