diff options
author | Paul Wilkins <paulwilkins@google.com> | 2015-11-18 11:10:13 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-11-18 11:10:13 +0000 |
commit | 85aea16f176a83598586c72134df373069fe22ba (patch) | |
tree | f293b769b06ad3960c6d312d533a193daeac1e9c /vp9/encoder | |
parent | 8a782c7eacb4756a6bc8af78592107ce28b8565c (diff) | |
parent | 0149fb3d6b24f1df7f215ca12a2d8c8f70fd64f3 (diff) | |
download | libvpx-85aea16f176a83598586c72134df373069fe22ba.tar libvpx-85aea16f176a83598586c72134df373069fe22ba.tar.gz libvpx-85aea16f176a83598586c72134df373069fe22ba.tar.bz2 libvpx-85aea16f176a83598586c72134df373069fe22ba.zip |
Merge "Changes to exhaustive motion search."
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_block.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_ethread.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 214 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 40 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 50 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 19 |
9 files changed, 259 insertions, 83 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index fc3478626..ca5d7465f 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -71,6 +71,8 @@ struct macroblock { int rddiv; int rdmult; int mb_energy; + int * m_search_count_ptr; + int * ex_search_count_ptr; // These are set to their default values at the beginning, and then adjusted // further in the encoding process. diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 047588369..f9c28f6a9 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3839,6 +3839,10 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col]; int mi_row; + // Set up pointers to per thread motion search counters. + td->mb.m_search_count_ptr = &td->rd_counts.m_search_count; + td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count; + for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; mi_row += MI_BLOCK_SIZE) { if (cpi->sf.use_nonrd_pick_mode) @@ -3895,6 +3899,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_zero(rdc->coef_counts); vp9_zero(rdc->comp_pred_diff); vp9_zero(rdc->filter_diff); + rdc->m_search_count = 0; // Count of motion search hits. + rdc->ex_search_count = 0; // Exhaustive mesh search hits. + xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index eebd7c548..a57cf8725 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2995,7 +2995,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d" + fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d" "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" " "%10"PRId64" %10"PRId64" %10d " "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" @@ -3004,6 +3004,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { "%10lf %8u %10"PRId64" %10d %10d %10d\n", cpi->common.current_video_frame, cm->width, cm->height, + cpi->td.rd_counts.m_search_count, + cpi->td.rd_counts.ex_search_count, cpi->rc.source_alt_ref_pending, cpi->rc.source_alt_ref_active, cpi->rc.this_frame_target, diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 975d9f42b..f6d8931fe 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -260,6 +260,8 @@ typedef struct RD_COUNTS { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; int64_t comp_pred_diff[REFERENCE_MODES]; int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; + int m_search_count; + int ex_search_count; } RD_COUNTS; typedef struct ThreadData { diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index ad25712be..1d1926cae 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -30,6 +30,10 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { for (n = 0; n < ENTROPY_TOKENS; n++) td->rd_counts.coef_counts[i][j][k][l][m][n] += td_t->rd_counts.coef_counts[i][j][k][l][m][n]; + + // Counts of all motion searches and exhuastive mesh searches. + td->rd_counts.m_search_count += td_t->rd_counts.m_search_count; + td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count; } static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index b9a104a60..327ac1985 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1517,69 +1517,83 @@ static int fast_dia_search(const MACROBLOCK *x, #undef CHECK_BETTER -int vp9_full_range_search_c(const MACROBLOCK *x, - const search_site_config *cfg, - MV *ref_mv, MV *best_mv, - int search_param, int sad_per_bit, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { +// Exhuastive motion search around a given centre position with a given +// step size. +static int exhuastive_mesh_search(const MACROBLOCK *x, + MV *ref_mv, MV *best_mv, + int range, int step, int sad_per_bit, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int range = 64; - const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; + MV fcenter_mv = {center_mv->row, center_mv->col}; unsigned int best_sad = INT_MAX; int r, c, i; int start_col, end_col, start_row, end_row; + int col_step = (step > 1) ? step : 4; - // The cfg and search_param parameters are not used in this search variant - (void)cfg; - (void)search_param; + assert(step >= 1); - clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - *best_mv = *ref_mv; - *num00 = 11; + clamp_mv(&fcenter_mv, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + *best_mv = fcenter_mv; best_sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, ref_mv), in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); - start_row = VPXMAX(-range, x->mv_row_min - ref_mv->row); - start_col = VPXMAX(-range, x->mv_col_min - ref_mv->col); - end_row = VPXMIN(range, x->mv_row_max - ref_mv->row); - end_col = VPXMIN(range, x->mv_col_max - ref_mv->col); - - for (r = start_row; r <= end_row; ++r) { - for (c = start_col; c <= end_col; c += 4) { - if (c + 3 <= end_col) { - unsigned int sads[4]; - const uint8_t *addrs[4]; - for (i = 0; i < 4; ++i) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - addrs[i] = get_buf_from_mv(in_what, &mv); - } - - fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); - - for (i = 0; i < 4; ++i) { - if (sads[i] < best_sad) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - const unsigned int sad = sads[i] + - mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; - } + get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + + mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); + start_row = VPXMAX(-range, x->mv_row_min - fcenter_mv.row); + start_col = VPXMAX(-range, x->mv_col_min - fcenter_mv.col); + end_row = VPXMIN(range, x->mv_row_max - fcenter_mv.row); + end_col = VPXMIN(range, x->mv_col_max - fcenter_mv.col); + + for (r = start_row; r <= end_row; r += step) { + for (c = start_col; c <= end_col; c += col_step) { + // Step > 1 means we are not checking every location in this pass. + if (step > 1) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c}; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } } else { - for (i = 0; i < end_col - c; ++i) { - const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; - unsigned int sad = fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + // 4 sads in a single call if we are checking every location + if (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + addrs[i] = get_buf_from_mv(in_what, &mv); + } + fn_ptr->sdx4df(what->buf, what->stride, addrs, + in_what->stride, sads); + + for (i = 0; i < 4; ++i) { + if (sads[i] < best_sad) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + const unsigned int sad = sads[i] + + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } + } + } + } else { + for (i = 0; i < end_col - c; ++i) { + const MV mv = {fcenter_mv.row + r, fcenter_mv.col + c + i}; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { - best_sad = sad; - *best_mv = mv; + sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; + } } } } @@ -2011,6 +2025,70 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, return bestsme; } +#define MIN_RANGE 7 +#define MAX_RANGE 256 +#define MIN_INTERVAL 1 +// Runs an limited range exhaustive mesh search using a pattern set +// according to the encode speed profile. +static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, + MV *centre_mv_full, int sadpb, int *cost_list, + const vp9_variance_fn_ptr_t *fn_ptr, + const MV *ref_mv, MV *dst_mv) { + const SPEED_FEATURES *const sf = &cpi->sf; + MV temp_mv = {centre_mv_full->row, centre_mv_full->col}; + MV f_ref_mv = {ref_mv->row >> 3, ref_mv->col >> 3}; + int bestsme; + int i; + int interval = sf->mesh_patterns[0].interval; + int range = sf->mesh_patterns[0].range; + int baseline_interval_divisor; + + // Keep track of number of exhaustive calls (this frame in this thread). + ++(*x->ex_search_count_ptr); + + // Trap illegal values for interval and range for this function. + if ((range < MIN_RANGE) || (range > MAX_RANGE) || + (interval < MIN_INTERVAL) || (interval > range)) + return INT_MAX; + + baseline_interval_divisor = range / interval; + + // Check size of proposed first range against magnitude of the centre + // value used as a starting point. + range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); + range = VPXMIN(range, MAX_RANGE); + interval = VPXMAX(interval, range / baseline_interval_divisor); + + // initial search + bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, + interval, sadpb, fn_ptr, &temp_mv); + + if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { + // Progressive searches with range and step size decreasing each time + // till we reach a step size of 1. Then break out. + for (i = 1; i < MAX_MESH_STEP; ++i) { + // First pass with coarser step and longer range + bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, + sf->mesh_patterns[i].range, + sf->mesh_patterns[i].interval, + sadpb, fn_ptr, &temp_mv); + + if (sf->mesh_patterns[i].interval == 1) + break; + } + } + + if (bestsme < INT_MAX) + bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); + *dst_mv = temp_mv; + + // Return cost list. + if (cost_list) { + calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); + } + return bestsme; +} + int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, @@ -2324,6 +2402,18 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, return best_sad; } +#define MIN_EX_SEARCH_LIMIT 128 +static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) { + const SPEED_FEATURES *const sf = &cpi->sf; + const int max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT, + (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100); + + return sf->allow_exhaustive_searches && + (sf->exhaustive_searches_thresh < INT_MAX) && + (*x->ex_search_count_ptr <= max_ex) && + !cpi->rc.is_src_frame_alt_ref; +} + int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, @@ -2342,6 +2432,9 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, cost_list[4] = INT_MAX; } + // Keep track of number of searches (this frame in this thread). + ++(*x->m_search_count_ptr); + switch (method) { case FAST_DIAMOND: var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, @@ -2367,6 +2460,27 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); + + // Should we allow a follow on exhaustive search? + if (is_exhaustive_allowed(cpi, x)) { + int64_t exhuastive_thr = sf->exhaustive_searches_thresh; + exhuastive_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + + // Threshold variance for an exhaustive full search. + if (var > exhuastive_thr) { + int var_ex; + MV tmp_mv_ex; + var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, + error_per_bit, cost_list, fn_ptr, + ref_mv, &tmp_mv_ex); + + if (var_ex < var) { + var = var_ex; + *tmp_mv = tmp_mv_ex; + } + } + } break; default: assert(0 && "Invalid search method."); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index a40050158..ae1446417 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1750,8 +1750,9 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; + SPEED_FEATURES *const sf = &cpi->sf; const int has_second_rf = has_second_ref(mbmi); - const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; + const int inter_mode_mask = sf->inter_mode_mask[bsize]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; vp9_zero(*bsi); @@ -1820,7 +1821,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; - int thissme, bestsme = INT_MAX; + int bestsme = INT_MAX; int sadpb = x->sadperbit4; MV mvp_full; int max_mv; @@ -1845,7 +1846,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, max_mv = VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; - if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { + if (sf->mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. @@ -1858,7 +1859,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row = bsi->mvp.as_mv.row >> 3; mvp_full.col = bsi->mvp.as_mv.col >> 3; - if (cpi->sf.adaptive_motion_search) { + if (sf->adaptive_motion_search) { mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; step_param = VPXMAX(step_param, 8); @@ -1871,31 +1872,10 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, sadpb, - cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, + sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); - // Should we do a full search (best quality only) - if (cpi->oxcf.mode == BEST) { - int_mv *const best_mv = &mi->bmi[i].as_mv[0]; - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, &cpi->fn_ptr[bsize], - &bsi->ref_mv[0]->as_mv, - &best_mv->as_mv); - cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; - if (thissme < bestsme) { - bestsme = thissme; - *new_mv = best_mv->as_mv; - } else { - // The full search result is actually worse so re-instate the - // previous best vector - best_mv->as_mv = *new_mv; - } - } - if (bestsme < INT_MAX) { int distortion; cpi->find_fractional_mv_step( @@ -1904,8 +1884,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, + sf->mv.subpel_force_stop, + sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, @@ -1916,7 +1896,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; } - if (cpi->sf.adaptive_motion_search) + if (sf->adaptive_motion_search) x->pred_mv[mbmi->ref_frame[0]] = *new_mv; // restore src pointers @@ -1933,7 +1913,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->interp_filter == EIGHTTAP) { // adjust src pointers mi_buf_shift(x, i); - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (sf->comp_inter_joint_search_thresh <= bsize) { int rate_mv; joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, mi_col, seg_mvs[i], diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index a53962984..b7daff3bc 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -15,6 +15,22 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vpx_dsp/vpx_dsp_common.h" +// Mesh search patters for various speed settings +static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = + {{64, 4}, {28, 2}, {15, 1}, {7, 1}, {1, 1}, {1, 1}}; + +#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method +static MESH_PATTERN good_quality_mesh_patterns[MAX_MESH_SPEED + 1] + [MAX_MESH_STEP] = + {{{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}}, + {{64, 8}, {28, 4}, {15, 1}, {7, 1}, {3, 1}, {2, 1}}, + {{64, 8}, {14, 2}, {7, 1}, {7, 1}, {3, 1}, {2, 1}}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }}, + {{64, 16}, {24, 8}, {12, 4}, {7, 1}, {3, 1 }, {2, 1 }}, + }; +static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = + {50, 25, 15, 5, 1, 1}; // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality @@ -259,6 +275,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; sf->use_fast_coef_costing = 1; + sf->allow_exhaustive_searches = 0; + sf->exhaustive_searches_thresh = INT_MAX; if (speed >= 1) { sf->use_square_partition_only = !frame_is_intra_only(cm); @@ -527,8 +545,36 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { set_good_speed_feature(cpi, cm, sf, oxcf->speed); cpi->full_search_sad = vp9_full_search_sad; - cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search - : vp9_diamond_search_sad; + cpi->diamond_search_sad = vp9_diamond_search_sad; + + sf->allow_exhaustive_searches = 1; + if (oxcf->mode == BEST) { + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) + sf->exhaustive_searches_thresh = (1 << 20); + else + sf->exhaustive_searches_thresh = (1 << 21); + sf->max_exaustive_pct = 100; + for (i = 0; i < MAX_MESH_STEP; ++i) { + sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range; + sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval; + } + } else { + int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed; + if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) + sf->exhaustive_searches_thresh = (1 << 22); + else + sf->exhaustive_searches_thresh = (1 << 23); + sf->max_exaustive_pct = good_quality_max_mesh_pct[speed]; + if (speed > 0) + sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1; + + for (i = 0; i < MAX_MESH_STEP; ++i) { + sf->mesh_patterns[i].range = + good_quality_mesh_patterns[speed][i].range; + sf->mesh_patterns[i].interval = + good_quality_mesh_patterns[speed][i].interval; + } + } // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 575e98cf5..e674a4f91 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -195,6 +195,13 @@ typedef struct MV_SPEED_FEATURES { int fullpel_search_step_param; } MV_SPEED_FEATURES; +#define MAX_MESH_STEP 6 + +typedef struct MESH_PATTERN { + int range; + int interval; +} MESH_PATTERN; + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -299,6 +306,18 @@ typedef struct SPEED_FEATURES { // point for this motion search and limits the search range around it. int adaptive_motion_search; + // Flag for allowing some use of exhaustive searches; + int allow_exhaustive_searches; + + // Threshold for allowing exhaistive motion search. + int exhaustive_searches_thresh; + + // Maximum number of exhaustive searches for a frame. + int max_exaustive_pct; + + // Pattern to be used for any exhaustive mesh searches. + MESH_PATTERN mesh_patterns[MAX_MESH_STEP]; + int schedule_mode_search; // Allows sub 8x8 modes to use the prediction filter that was determined |