diff options
author | Angie Chiang <angiebird@google.com> | 2019-07-15 18:40:10 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2019-07-15 18:40:10 +0000 |
commit | d749bc7b33230f128baec7ecfa19c0d3e1607956 (patch) | |
tree | eb2ee0c7223ce919ae1801026971b6ca412d8e61 | |
parent | bb407a27b2e32f89f0e9eeee2bcd0aa9d5cfea3f (diff) | |
parent | 037d67f684683ffad22e38ab9a6381ccfedd813f (diff) | |
download | libvpx-d749bc7b33230f128baec7ecfa19c0d3e1607956.tar libvpx-d749bc7b33230f128baec7ecfa19c0d3e1607956.tar.gz libvpx-d749bc7b33230f128baec7ecfa19c0d3e1607956.tar.bz2 libvpx-d749bc7b33230f128baec7ecfa19c0d3e1607956.zip |
Merge changes I9288c88d,Ib1ac6f57,I02fac56a,Id6a8b117
* changes:
Use sdx8f in exhaustive_mesh_search_single_step
Sync the behavior of exhaustive_mesh_search
Refactor exhaustive_mesh_search_new
Simplify code in exhaustive_mesh_search_new
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 51 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 193 | ||||
-rw-r--r-- | vpx_dsp/variance.h | 1 |
3 files changed, 150 insertions, 95 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index ec7baf0a7..dd0d10d53 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1533,13 +1533,15 @@ static void set_rc_buffer_sizes(RATE_CONTROL *rc, } #if CONFIG_VP9_HIGHBITDEPTH +// TODO(angiebird): make sdx8f available for highbitdepth if needed #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = NULL; #define MAKE_BFP_SAD_WRAPPER(fnname) \ static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ @@ -2438,62 +2440,67 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); cpi->source_var_thresh = 0; cpi->frames_till_next_var_check = 0; +#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].sdaf = SDAF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svaf = SVAF; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; \ + cpi->fn_ptr[BT].sdx8f = SDX8F; -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; - + // TODO(angiebird): make sdx8f available for every block size BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16, vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, - vpx_sad32x16x4d) + vpx_sad32x16x4d, NULL) BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32, vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32, - vpx_sad16x32x4d) + vpx_sad16x32x4d, NULL) BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32, vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32, - vpx_sad64x32x4d) + vpx_sad64x32x4d, NULL) BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64, vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64, - vpx_sad32x64x4d) + vpx_sad32x64x4d, NULL) BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32, vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32, - vpx_sad32x32x4d) + vpx_sad32x32x4d, NULL) BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64, vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64, - vpx_sad64x64x4d) + vpx_sad64x64x4d, NULL) BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16, vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16, - vpx_sad16x16x4d) + vpx_sad16x16x4d, vpx_sad16x16x8) BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8, vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8, - vpx_sad16x8x4d) + vpx_sad16x8x4d, vpx_sad16x8x8) BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16, vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16, - vpx_sad8x16x4d) + vpx_sad8x16x4d, vpx_sad8x16x8) BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8, - vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d) + vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d, + vpx_sad8x8x8) BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4, - vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d) + vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d, + NULL) BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8, - vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d) + vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d, + NULL) BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4, - vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d) + vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d, + vpx_sad4x4x8) #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index fbf5e3974..d5707fb64 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1923,47 +1923,99 @@ int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, return best_cost; } -static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, - int range, int step, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int lambda, - const int_mv *nb_full_mvs, - int full_mv_num) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - MV fcenter_mv = { center_mv->row, center_mv->col }; +static int64_t exhaustive_mesh_search_multi_step( + MV *best_mv, const MV *center_mv, int range, int step, + const struct buf_2d *src, const struct buf_2d *pre, int lambda, + const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { int64_t best_sad; - int r, c, i; + int r, c; int start_col, end_col, start_row, end_row; - int col_step = (step > 1) ? step : 4; + *best_mv = *center_mv; + best_sad = + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) + << LOG2_PRECISION) + + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += step) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } + return best_sad; +} - assert(step >= 1); +static int64_t exhaustive_mesh_search_single_step( + MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src, + const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, + int full_mv_num, const MvLimits *mv_limits, + const vp9_variance_fn_ptr_t *fn_ptr) { + int64_t best_sad; + int r, c, i; + int start_col, end_col, start_row, end_row; - clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - *best_mv = fcenter_mv; + *best_mv = *center_mv; best_sad = - ((int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &fcenter_mv), - in_what->stride) + ((int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, center_mv), pre->stride) << LOG2_PRECISION) + - lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num); - start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); - start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); - end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); - end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); + start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); + start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); + end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); + end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); + for (r = start_row; r <= end_row; r += 1) { + c = start_col; + // sdx8f may not be available some block size + if (fn_ptr->sdx8f) { + while (c + 7 <= end_col) { + unsigned int sads[8]; + const MV mv = { r, c }; + const uint8_t *buf = get_buf_from_mv(pre, &mv); + fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads); + + for (i = 0; i < 8; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; + if (sad < best_sad) { + const MV mv = { r, c + i }; + sad += lambda * + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + c += 8; + } + } + while (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = { r, c + i }; + addrs[i] = get_buf_from_mv(pre, &mv); + } + fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads); - for (r = start_row; r <= end_row; r += step) { - for (c = start_col; c <= end_col; c += col_step) { - // Step > 1 means we are not checking every location in this pass. - if (step > 1) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; - int64_t sad = - (int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride) - << LOG2_PRECISION; + for (i = 0; i < 4; ++i) { + int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; if (sad < best_sad) { + const MV mv = { r, c + i }; sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { @@ -1971,53 +2023,48 @@ static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, *best_mv = mv; } } - } else { - // 4 sads in a single call if we are checking every location - if (c + 3 <= end_col) { - unsigned int sads[4]; - const uint8_t *addrs[4]; - for (i = 0; i < 4; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - addrs[i] = get_buf_from_mv(in_what, &mv); - } - fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); - - for (i = 0; i < 4; ++i) { - int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; - if (sad < best_sad) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - sad += lambda * - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - } - } else { - for (i = 0; i < end_col - c; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - int64_t sad = (int64_t)fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), - in_what->stride) - << LOG2_PRECISION; - if (sad < best_sad) { - sad += lambda * - vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } - } - } + } + c += 4; + } + while (c <= end_col) { + const MV mv = { r, c }; + int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, + get_buf_from_mv(pre, &mv), pre->stride) + << LOG2_PRECISION; + if (sad < best_sad) { + sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } + c += 1; } } - return best_sad; } +static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, + int range, int step, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv, int lambda, + const int_mv *nb_full_mvs, + int full_mv_num) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *src = &x->plane[0].src; + const struct buf_2d *pre = &xd->plane[0].pre[0]; + assert(step >= 1); + assert(is_mv_in(&x->mv_limits, center_mv)); + if (step == 1) { + return exhaustive_mesh_search_single_step( + best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num, + &x->mv_limits, fn_ptr); + } + return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src, + pre, lambda, nb_full_mvs, + full_mv_num, &x->mv_limits, fn_ptr); +} + static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *centre_mv_full, const vp9_variance_fn_ptr_t *fn_ptr, diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h index 6d0e1b8a6..bbf3e8f46 100644 --- a/vpx_dsp/variance.h +++ b/vpx_dsp/variance.h @@ -76,6 +76,7 @@ typedef struct vp9_variance_vtable { vpx_subpixvariance_fn_t svf; vpx_subp_avg_variance_fn_t svaf; vpx_sad_multi_d_fn_t sdx4df; + vpx_sad_multi_fn_t sdx8f; } vp9_variance_fn_ptr_t; #endif // CONFIG_VP9 |