diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 90 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 36 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 116 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 58 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 95 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 159 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 45 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 82 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct32x32_sse2.c | 3 |
15 files changed, 275 insertions, 444 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index c5a85c9df..35d2ecf96 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -1006,9 +1006,10 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, found = cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height; - // TODO(ivan): This prevents a bug while more than 3 buffers are used. Do it - // in a better way. - if (cpi->use_svc) { + // Set "found" to 0 for temporal svc and for spatial svc key frame + if (cpi->use_svc && + (cpi->svc.number_spatial_layers == 1 || + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame)) { found = 0; } vp9_wb_write_bit(wb, found); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index f35a85fba..2ccf4f80e 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -129,12 +129,6 @@ struct macroblock { int *nmvsadcost_hp[2]; int **mvsadcost; - int mbmode_cost[INTRA_MODES]; - unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; - int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; - int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; - int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; - // These define limits to motion vector components to prevent them // from extending outside the UMV borders int mv_col_min; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 19aa592f2..87051d56b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -70,6 +70,18 @@ static const uint8_t VP9_VAR_OFFS[64] = { 128, 128, 128, 128, 128, 128, 128, 128 }; +static void get_sse_sum_8x8(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum) { + variance(src, src_stride, ref, ref_stride, 8, 8, sse, sum); +} + +static void get_sse_sum_16x16(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse, int *sum) { + variance(src, src_stride, ref, ref_stride, 16, 16, sse, sum); +} + static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { @@ -475,8 +487,8 @@ static void choose_partitioning(VP9_COMP *cpi, unsigned int sse = 0; int sum = 0; if (x_idx < pixels_wide && y_idx < pixels_high) - vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, - d + y_idx * dp + x_idx, dp, &sse, &sum); + get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); } } @@ -1211,10 +1223,9 @@ static void set_source_var_based_partition(VP9_COMP *cpi, int b_offset = b_mi_row * MI_SIZE * src_stride + b_mi_col * MI_SIZE; - vp9_get_sse_sum_16x16(src + b_offset, - src_stride, - pre_src + b_offset, - pre_stride, &d16[j].sse, &d16[j].sum); + get_sse_sum_16x16(src + b_offset, src_stride, + pre_src + b_offset, pre_stride, + &d16[j].sse, &d16[j].sum); d16[j].var = d16[j].sse - (((uint32_t)d16[j].sum * d16[j].sum) >> 8); @@ -3052,6 +3063,23 @@ static void encode_frame_internal(VP9_COMP *cpi) { #endif } +static INTERP_FILTER get_interp_filter( + const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { + if (!is_alt_ref && + threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && + threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && + threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { + return EIGHTTAP_SMOOTH; + } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && + threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { + return EIGHTTAP_SHARP; + } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { + return EIGHTTAP; + } else { + return SWITCHABLE; + } +} + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; @@ -3087,59 +3115,41 @@ void vp9_encode_frame(VP9_COMP *cpi) { // that for subsequent frames. // It does the same analysis for transform size selection also. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); - const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type]; - const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type]; + int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; + int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; + int *const tx_thrs = rd_opt->tx_select_threshes[frame_type]; + const int is_alt_ref = frame_type == ALTREF_FRAME; /* prediction (compound, single or hybrid) mode selection */ - if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter) + if (is_alt_ref || !cm->allow_comp_inter_inter) cm->reference_mode = SINGLE_REFERENCE; - else if (mode_thresh[COMPOUND_REFERENCE] > mode_thresh[SINGLE_REFERENCE] && - mode_thresh[COMPOUND_REFERENCE] > - mode_thresh[REFERENCE_MODE_SELECT] && + else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && + mode_thrs[COMPOUND_REFERENCE] > + mode_thrs[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) cm->reference_mode = COMPOUND_REFERENCE; - else if (mode_thresh[SINGLE_REFERENCE] > mode_thresh[REFERENCE_MODE_SELECT]) + else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) cm->reference_mode = SINGLE_REFERENCE; else cm->reference_mode = REFERENCE_MODE_SELECT; - if (cm->interp_filter == SWITCHABLE) { - if (frame_type != ALTREF_FRAME && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP] && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[EIGHTTAP_SHARP] && - filter_thresh[EIGHTTAP_SMOOTH] > filter_thresh[SWITCHABLE - 1]) { - cm->interp_filter = EIGHTTAP_SMOOTH; - } else if (filter_thresh[EIGHTTAP_SHARP] > filter_thresh[EIGHTTAP] && - filter_thresh[EIGHTTAP_SHARP] > filter_thresh[SWITCHABLE - 1]) { - cm->interp_filter = EIGHTTAP_SHARP; - } else if (filter_thresh[EIGHTTAP] > filter_thresh[SWITCHABLE - 1]) { - cm->interp_filter = EIGHTTAP; - } - } + if (cm->interp_filter == SWITCHABLE) + cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); encode_frame_internal(cpi); - for (i = 0; i < REFERENCE_MODES; ++i) { - const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs); - rd_opt->prediction_type_threshes[frame_type][i] += diff; - rd_opt->prediction_type_threshes[frame_type][i] >>= 1; - } + for (i = 0; i < REFERENCE_MODES; ++i) + mode_thrs[i] = (mode_thrs[i] + rd_opt->comp_pred_diff[i] / cm->MBs) / 2; - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { - const int64_t diff = rd_opt->filter_diff[i] / cm->MBs; - rd_opt->filter_threshes[frame_type][i] = - (rd_opt->filter_threshes[frame_type][i] + diff) / 2; - } + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + filter_thrs[i] = (filter_thrs[i] + rd_opt->filter_diff[i] / cm->MBs) / 2; for (i = 0; i < TX_MODES; ++i) { int64_t pd = rd_opt->tx_select_diff[i]; - int diff; if (i == TX_MODE_SELECT) pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0); - diff = (int) (pd / cm->MBs); - rd_opt->tx_select_threshes[frame_type][i] += diff; - rd_opt->tx_select_threshes[frame_type][i] /= 2; + tx_thrs[i] = (tx_thrs[i] + (int)(pd / cm->MBs)) / 2; } if (cm->reference_mode == REFERENCE_MODE_SELECT) { diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index f3ab4ede3..cc2c552a7 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1050,10 +1050,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cm->error.setjmp = 0; -#ifdef MODE_TEST_HIT_STATS - vp9_zero(cpi->mode_test_hits); -#endif - return cpi; } @@ -1112,34 +1108,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #endif -#ifdef MODE_TEST_HIT_STATS - if (cpi->pass != 1) { - double norm_per_pixel_mode_tests = 0; - double norm_counts[BLOCK_SIZES]; - int i; - int sb64_per_frame; - int norm_factors[BLOCK_SIZES] = - {256, 128, 128, 64, 32, 32, 16, 8, 8, 4, 2, 2, 1}; - FILE *f = fopen("mode_hit_stats.stt", "a"); - - // On average, how many mode tests do we do - for (i = 0; i < BLOCK_SIZES; ++i) { - norm_counts[i] = (double)cpi->mode_test_hits[i] / - (double)norm_factors[i]; - norm_per_pixel_mode_tests += norm_counts[i]; - } - // Convert to a number per 64x64 and per frame - sb64_per_frame = ((cpi->common.height + 63) / 64) * - ((cpi->common.width + 63) / 64); - norm_per_pixel_mode_tests = - norm_per_pixel_mode_tests / - (double)(cpi->common.current_video_frame * sb64_per_frame); - - fprintf(f, "%6.4f\n", norm_per_pixel_mode_tests); - fclose(f); - } -#endif - #if 0 { printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); @@ -2093,7 +2061,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->lf.mode_ref_delta_update = 0; // Initialize cpi->mv_step_param to default based on max resolution. - cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def); + cpi->mv_step_param = vp9_init_search_range(sf, max_mv_def); // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate. if (sf->auto_mv_step_size) { if (frame_is_intra_only(cm)) { @@ -2105,7 +2073,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Allow mv_steps to correspond to twice the max mv magnitude found // in the previous frame, capped by the default max_mv_magnitude based // on resolution. - cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 * + cpi->mv_step_param = vp9_init_search_range(sf, MIN(max_mv_def, 2 * cpi->max_mv_magnitude)); cpi->max_mv_magnitude = 0; } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 364ea3a9e..f48909e47 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -40,8 +40,6 @@ extern "C" { #endif -// #define MODE_TEST_HIT_STATS - #define DEFAULT_GF_INTERVAL 10 #define MAX_MODES 30 @@ -499,6 +497,12 @@ typedef struct VP9_COMP { search_site_config ss_cfg; + int mbmode_cost[INTRA_MODES]; + unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; + int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; + int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; + int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; + #if CONFIG_MULTIPLE_ARF // ARF tracking variables. int multi_arf_enabled; @@ -511,11 +515,6 @@ typedef struct VP9_COMP { int this_frame_weight; int max_arf_level; #endif - -#ifdef MODE_TEST_HIT_STATS - // Debug / test stats - int64_t mode_test_hits[BLOCK_SIZES]; -#endif } VP9_COMP; void vp9_initialize_enc(); diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index bbec4da76..43c8ab868 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -51,7 +51,7 @@ void vp9_set_mv_search_range(MACROBLOCK *x, const MV *mv) { x->mv_row_max = row_max; } -int vp9_init_search_range(VP9_COMP *cpi, int size) { +int vp9_init_search_range(const SPEED_FEATURES *sf, int size) { int sr = 0; // Minimum search size no matter what the passed in value. @@ -60,8 +60,8 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) { while ((size << sr) < MAX_FULL_PEL_VAL) sr++; - sr += cpi->sf.reduce_first_step_size; - sr = MIN(sr, (cpi->sf.max_step_search_steps - 2)); + sr += sf->reduce_first_step_size; + sr = MIN(sr, (sf->max_step_search_steps - 2)); return sr; } @@ -878,89 +878,67 @@ int vp9_full_range_search_c(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - - unsigned int bestsad = INT_MAX; - int ref_row, ref_col; - - unsigned int thissad; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const int range = 64; const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3}; - int tr, tc; - int best_tr = 0; - int best_tc = 0; - int range = 64; - - int start_col, end_col; - int start_row, end_row; - int i; + unsigned int best_sad = INT_MAX; + int r, c, i; + int start_col, end_col, start_row, end_row; clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; + *best_mv = *ref_mv; *num00 = 11; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - start_row = MAX(-range, x->mv_row_min - ref_row); - start_col = MAX(-range, x->mv_col_min - ref_col); - end_row = MIN(range, x->mv_row_max - ref_row); - end_col = MIN(range, x->mv_col_max - ref_col); - - for (tr = start_row; tr <= end_row; ++tr) { - for (tc = start_col; tc <= end_col; tc += 4) { - if ((tc + 3) <= end_col) { - unsigned int sad_array[4]; - unsigned char const *addr_ref[4]; - for (i = 0; i < 4; ++i) - addr_ref[i] = in_what + tr * in_what_stride + tc + i; + best_sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, ref_mv), in_what->stride, + 0x7fffffff) + + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); + start_row = MAX(-range, x->mv_row_min - ref_mv->row); + start_col = MAX(-range, x->mv_col_min - ref_mv->col); + end_row = MIN(range, x->mv_row_max - ref_mv->row); + end_col = MIN(range, x->mv_col_max - ref_mv->col); + + for (r = start_row; r <= end_row; ++r) { + for (c = start_col; c <= end_col; c += 4) { + if (c + 3 <= end_col) { + unsigned int sads[4]; + const uint8_t *addrs[4]; + for (i = 0; i < 4; ++i) { + const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; + addrs[i] = get_buf_from_mv(in_what, &mv); + } - fn_ptr->sdx4df(what, what_stride, addr_ref, in_what_stride, sad_array); + fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); for (i = 0; i < 4; ++i) { - if (sad_array[i] < bestsad) { - const MV this_mv = {ref_row + tr, ref_col + tc + i}; - thissad = sad_array[i] + - mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_tr = tr; - best_tc = tc + i; + if (sads[i] < best_sad) { + const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; + const unsigned int sad = sads[i] + + mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } } } else { - for (i = 0; i < end_col - tc; ++i) { - const uint8_t *check_here = in_what + tr * in_what_stride + tc + i; - thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, - bestsad); - - if (thissad < bestsad) { - const MV this_mv = {ref_row + tr, ref_col + tc + i}; - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - - if (thissad < bestsad) { - bestsad = thissad; - best_tr = tr; - best_tc = tc + i; + for (i = 0; i < end_col - c; ++i) { + const MV mv = {ref_mv->row + r, ref_mv->col + c + i}; + unsigned int sad = fn_ptr->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, best_sad); + if (sad < best_sad) { + sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); + if (sad < best_sad) { + best_sad = sad; + *best_mv = mv; } } } } } } - best_mv->row += best_tr; - best_mv->col += best_tc; - return bestsad; + + return best_sad; } int vp9_diamond_search_sad_c(const MACROBLOCK *x, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 1f524f1f6..827957d62 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -62,7 +62,9 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, int use_mvcost); struct VP9_COMP; -int vp9_init_search_range(struct VP9_COMP *cpi, int size); +struct SPEED_FEATURES; + +int vp9_init_search_range(const struct SPEED_FEATURES *sf, int size); // Runs sequence of diamond searches in smaller steps for RD int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 7d2cab42b..ad77cd0fe 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -85,39 +85,9 @@ static void full_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; - if (cpi->sf.search_method == FAST_DIAMOND) { - // NOTE: this returns SAD - vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - } else if (cpi->sf.search_method == FAST_HEX) { - // NOTE: this returns SAD - vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - } else if (cpi->sf.search_method == HEX) { - // NOTE: this returns SAD - vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - } else if (cpi->sf.search_method == SQUARE) { - // NOTE: this returns SAD - vp9_square_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - } else if (cpi->sf.search_method == BIGDIA) { - // NOTE: this returns SAD - vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - } else { - int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - // NOTE: this returns variance - vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[bsize], - &ref_mv, &tmp_mv->as_mv); - } + full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv, + &tmp_mv->as_mv, INT_MAX, 0); + x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; x->mv_row_min = tmp_row_min; @@ -326,8 +296,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (frame_mv[NEWMV][ref_frame].as_int == INVALID_MV) continue; - rate_mode = x->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; + rate_mode = cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; if (RDCOST(x->rdmult, x->rddiv, rate_mv + rate_mode, 0) > best_rd) continue; @@ -361,24 +331,24 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP], &pf_dist[EIGHTTAP]); tmp_rdcost1 = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP], + vp9_get_switchable_rate(cpi) + pf_rate[EIGHTTAP], pf_dist[EIGHTTAP]); mbmi->interp_filter = EIGHTTAP_SHARP; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SHARP], &pf_dist[EIGHTTAP_SHARP]); - tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SHARP], - pf_dist[EIGHTTAP_SHARP]); + tmp_rdcost2 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) + + pf_rate[EIGHTTAP_SHARP], + pf_dist[EIGHTTAP_SHARP]); mbmi->interp_filter = EIGHTTAP_SMOOTH; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[EIGHTTAP_SMOOTH], &pf_dist[EIGHTTAP_SMOOTH]); - tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(x) + pf_rate[EIGHTTAP_SMOOTH], - pf_dist[EIGHTTAP_SMOOTH]); + tmp_rdcost3 = RDCOST(x->rdmult, x->rddiv, vp9_get_switchable_rate(cpi) + + pf_rate[EIGHTTAP_SMOOTH], + pf_dist[EIGHTTAP_SMOOTH]); if (tmp_rdcost2 < tmp_rdcost1) { if (tmp_rdcost2 < tmp_rdcost3) @@ -401,7 +371,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } rate += rate_mv; - rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] + rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); @@ -432,7 +402,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); - rate += x->mbmode_cost[this_mode]; + rate += cpi->mbmode_cost[this_mode]; rate += intra_cost_penalty; this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 24e75aedb..fe43f3a07 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -45,11 +45,9 @@ // Tables relating active max Q to active min Q static int kf_low_motion_minq[QINDEX_RANGE]; static int kf_high_motion_minq[QINDEX_RANGE]; -static int gf_low_motion_minq[QINDEX_RANGE]; -static int gf_high_motion_minq[QINDEX_RANGE]; +static int arfgf_low_motion_minq[QINDEX_RANGE]; +static int arfgf_high_motion_minq[QINDEX_RANGE]; static int inter_minq[QINDEX_RANGE]; -static int afq_low_motion_minq[QINDEX_RANGE]; -static int afq_high_motion_minq[QINDEX_RANGE]; static int gf_high = 2000; static int gf_low = 400; static int kf_high = 5000; @@ -81,13 +79,11 @@ void vp9_rc_init_minq_luts() { for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i); - kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15); + kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.125); kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50); - gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32); - gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); - afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); - afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); - inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); + arfgf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30); + arfgf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90); } } @@ -548,7 +544,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, } active_best_quality = get_active_quality( q, rc->gfu_boost, gf_low, gf_high, - gf_low_motion_minq, gf_high_motion_minq); + arfgf_low_motion_minq, arfgf_high_motion_minq); } else { // Use the lower of active_worst_quality and recent/average Q. if (cm->current_video_frame > 1) { @@ -676,17 +672,12 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) { if (q < cq_level) q = cq_level; - if (rc->frames_since_key > 1) { - active_best_quality = get_active_quality(q, rc->gfu_boost, - gf_low, gf_high, - afq_low_motion_minq, - afq_high_motion_minq); - } else { - active_best_quality = get_active_quality(q, rc->gfu_boost, - gf_low, gf_high, - gf_low_motion_minq, - gf_high_motion_minq); - } + + active_best_quality = get_active_quality(q, rc->gfu_boost, + gf_low, gf_high, + arfgf_low_motion_minq, + arfgf_high_motion_minq); + // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; @@ -694,20 +685,14 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; } else { - if (rc->frames_since_key > 1) { - active_best_quality = get_active_quality( - q, rc->gfu_boost, gf_low, gf_high, - afq_low_motion_minq, afq_high_motion_minq); - } else { - active_best_quality = get_active_quality( - q, rc->gfu_boost, gf_low, gf_high, - gf_low_motion_minq, gf_high_motion_minq); - } + active_best_quality = get_active_quality( + q, rc->gfu_boost, gf_low, gf_high, + arfgf_low_motion_minq, arfgf_high_motion_minq); } } else { active_best_quality = get_active_quality( q, rc->gfu_boost, gf_low, gf_high, - gf_low_motion_minq, gf_high_motion_minq); + arfgf_low_motion_minq, arfgf_high_motion_minq); } } else { if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { @@ -867,17 +852,12 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) { if (q < cq_level) q = cq_level; - if (rc->frames_since_key > 1) { - active_best_quality = get_active_quality(q, rc->gfu_boost, - gf_low, gf_high, - afq_low_motion_minq, - afq_high_motion_minq); - } else { - active_best_quality = get_active_quality(q, rc->gfu_boost, - gf_low, gf_high, - gf_low_motion_minq, - gf_high_motion_minq); - } + + active_best_quality = get_active_quality(q, rc->gfu_boost, + gf_low, gf_high, + arfgf_low_motion_minq, + arfgf_high_motion_minq); + // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; @@ -885,20 +865,14 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; } else { - if (rc->frames_since_key > 1) { - active_best_quality = get_active_quality( - q, rc->gfu_boost, gf_low, gf_high, - afq_low_motion_minq, afq_high_motion_minq); - } else { - active_best_quality = get_active_quality( - q, rc->gfu_boost, gf_low, gf_high, - gf_low_motion_minq, gf_high_motion_minq); - } + active_best_quality = get_active_quality( + q, rc->gfu_boost, gf_low, gf_high, + arfgf_low_motion_minq, arfgf_high_motion_minq); } } else { active_best_quality = get_active_quality( q, rc->gfu_boost, gf_low, gf_high, - gf_low_motion_minq, gf_high_motion_minq); + arfgf_low_motion_minq, arfgf_high_motion_minq); } } else { if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { @@ -1308,11 +1282,26 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->oxcf.key_freq == 0))) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1; + } + if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) { target = calc_iframe_target_size_one_pass_cbr(cpi); } } else { cm->frame_type = INTER_FRAME; + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (cpi->svc.spatial_layer_id == 0) { + lc->is_key_frame = 0; + } else { + lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; + } + } + if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) { target = calc_pframe_target_size_one_pass_cbr(cpi); } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index f854356b9..f4def1eef 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -152,24 +152,23 @@ static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize, } static void fill_mode_costs(VP9_COMP *cpi) { - MACROBLOCK *const x = &cpi->mb; const FRAME_CONTEXT *const fc = &cpi->common.fc; int i, j; for (i = 0; i < INTRA_MODES; i++) for (j = 0; j < INTRA_MODES; j++) - vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], + vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], vp9_intra_mode_tree); // TODO(rbultje) separate tables for superblock costing? - vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME], + vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); + vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME], vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree); - vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME], + vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME], fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->switchable_interp_costs[i], + vp9_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp9_switchable_interp_tree); } @@ -313,7 +312,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { &cm->fc.nmvc, cm->allow_high_precision_mv); for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - vp9_cost_tokens((int *)x->inter_mode_cost[i], + vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc.inter_mode_probs[i], vp9_inter_mode_tree); } } @@ -1187,7 +1186,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; - const int *bmode_costs = mb->mbmode_cost; + const int *bmode_costs = cpi->mbmode_cost; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); @@ -1203,7 +1202,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); - bmode_costs = mb->y_mode_costs[A][L]; + bmode_costs = cpi->y_mode_costs[A][L]; } this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, @@ -1250,7 +1249,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; int i; - int *bmode_costs = x->mbmode_cost; + int *bmode_costs = cpi->mbmode_cost; if (cpi->sf.tx_size_search_method == USE_FULL_RD) for (i = 0; i < TX_MODES; i++) @@ -1269,7 +1268,7 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); - bmode_costs = x->y_mode_costs[A][L]; + bmode_costs = cpi->y_mode_costs[A][L]; } mic->mbmi.mode = mode; @@ -1378,7 +1377,7 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, if (this_rate_tokenonly == INT_MAX) continue; this_rate = this_rate_tokenonly + - x->intra_uv_mode_cost[cpi->common.frame_type][mode]; + cpi->intra_uv_mode_cost[cpi->common.frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { @@ -1426,7 +1425,7 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED; super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); - *rate = *rate_tokenonly + x->intra_uv_mode_cost[cm->frame_type][DC_PRED]; + *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED]; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } @@ -1460,7 +1459,7 @@ static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, // Don't account for mode here if segment skip is enabled. if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) { assert(is_inter_mode(mode)); - return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; + return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; } else { return 0; } @@ -1736,7 +1735,6 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize = mbmi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; - vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize]; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; const int has_second_rf = has_second_ref(mbmi); @@ -1808,7 +1806,6 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) { MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; - int further_steps; int thissme, bestsme = INT_MAX; int sadpb = x->sadperbit4; MV mvp_full; @@ -1836,8 +1833,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. - step_param = (vp9_init_search_range(cpi, max_mv) + - cpi->mv_step_param) >> 1; + step_param = (vp9_init_search_range(&cpi->sf, max_mv) + + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } @@ -1851,48 +1848,14 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, step_param = MAX(step_param, 8); } - further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; // adjust src pointer for this block mi_buf_shift(x, i); vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); - if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - new_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, new_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - new_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, new_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, - step_param, - sadpb, 1, v_fn_ptr, 1, - &bsi->ref_mv[0]->as_mv, - new_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, new_mv, - &bsi->ref_mv[0]->as_mv, - v_fn_ptr, 1); - } else { - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - &bsi->ref_mv[0]->as_mv, - new_mv); - } + bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param, + sadpb, &bsi->ref_mv[0]->as_mv, new_mv, + INT_MAX, 1); // Should we do a full search (best quality only) if (is_best_mode(cpi->oxcf.mode)) { @@ -1901,7 +1864,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, v_fn_ptr, + sadpb, 16, &cpi->fn_ptr[bsize], &bsi->ref_mv[0]->as_mv, &best_mv->as_mv); if (thissme < bestsme) { @@ -1920,7 +1883,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, - x->errorperbit, v_fn_ptr, + x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.subpel_force_stop, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, @@ -2334,12 +2297,12 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -int vp9_get_switchable_rate(const MACROBLOCK *x) { - const MACROBLOCKD *const xd = &x->e_mbd; +int vp9_get_switchable_rate(const VP9_COMP *cpi) { + const MACROBLOCKD *const xd = &cpi->mb.e_mbd; const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * - x->switchable_interp_costs[ctx][mbmi->interp_filter]; + cpi->switchable_interp_costs[ctx][mbmi->interp_filter]; } static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, @@ -2351,7 +2314,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; - int further_steps, step_param; + int step_param; int sadpb = x->sadperbit16; MV mvp_full; int ref = mbmi->ref_frame[0]; @@ -2389,8 +2352,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Take wtd average of the step_params based on the last frame's // max mv magnitude and that based on the best ref mvs of the current // block for the given reference. - step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) + - cpi->mv_step_param) >> 1; + step_param = (vp9_init_search_range(&cpi->sf, x->max_mv_context[ref]) + + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } @@ -2431,50 +2394,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; - // Further step/diamond searches as necessary - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - if (cpi->sf.search_method == FAST_DIAMOND) { - bestsme = vp9_fast_dia_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == FAST_HEX) { - bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1, - &cpi->fn_ptr[bsize], 1, - &ref_mv, &tmp_mv->as_mv); - if (bestsme < INT_MAX) - bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv, - &cpi->fn_ptr[bsize], 1); - } else { - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 1, - &cpi->fn_ptr[bsize], - &ref_mv, &tmp_mv->as_mv); - } + bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, + &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -2788,7 +2709,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int j; int64_t rs_rd; mbmi->interp_filter = i; - rs = vp9_get_switchable_rate(x); + rs = vp9_get_switchable_rate(cpi); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2858,7 +2779,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : *best_filter; - rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(x) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0; if (pred_exists) { if (best_needs_copy) { @@ -2888,7 +2809,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += vp9_get_switchable_rate(x); + *rate2 += vp9_get_switchable_rate(cpi); if (!is_comp_pred) { if (!x->in_active_map) { @@ -3409,12 +3330,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < TX_MODES; ++i) tx_cache[i] = INT64_MAX; -#ifdef MODE_TEST_HIT_STATS - // TEST/DEBUG CODE - // Keep a rcord of the number of test hits at each size - cpi->mode_test_hits[bsize]++; -#endif - if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, @@ -3435,7 +3350,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, skippable = skippable && skip_uv[uv_tx]; mbmi->uv_mode = mode_uv[uv_tx]; - rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; + rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx]; if (this_mode != DC_PRED && this_mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; @@ -3953,12 +3868,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } -#ifdef MODE_TEST_HIT_STATS - // TEST/DEBUG CODE - // Keep a rcord of the number of test hits at each size - cpi->mode_test_hits[bsize]++; -#endif - if (ref_frame == INTRA_FRAME) { int rate; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, @@ -4034,7 +3943,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, if (tmp_rd == INT64_MAX) continue; - rs = vp9_get_switchable_rate(x); + rs = vp9_get_switchable_rate(cpi); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); rd_opt->filter_cache[switchable_filter_index] = tmp_rd; rd_opt->filter_cache[SWITCHABLE_FILTERS] = @@ -4112,7 +4021,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += vp9_get_switchable_rate(x); + rate2 += vp9_get_switchable_rate(cpi); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index cd622d608..b6b51e553 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -40,7 +40,7 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); -int vp9_get_switchable_rate(const MACROBLOCK *x); +int vp9_get_switchable_rate(const VP9_COMP *cpi); void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, @@ -87,6 +87,49 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); +static INLINE int full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, + int step_param, int error_per_bit, + const MV *ref_mv, MV *tmp_mv, + int var_max, int rd) { + int var = 0; + + if (cpi->sf.search_method == FAST_DIAMOND) { + var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, + &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); + if (rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == FAST_HEX) { + var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, + &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); + if (rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == HEX) { + var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, + &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); + if (rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == SQUARE) { + var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, + &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); + if (rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); + } else if (cpi->sf.search_method == BIGDIA) { + var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, + &cpi->fn_ptr[bsize], 1, ref_mv, tmp_mv); + if (rd && var < var_max) + var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, &cpi->fn_ptr[bsize], 1); + } else { + int further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + + var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, + further_steps, 1, &cpi->fn_ptr[bsize], + ref_mv, tmp_mv); + } + + return var; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index cff99a6dc..a384a4360 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -129,7 +129,7 @@ typedef enum { ONE_LOOP_REDUCED = 2 } FAST_COEFF_UPDATE; -typedef struct { +typedef struct SPEED_FEATURES { // Frame level coding parameter update int frame_parameter_update; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index a5234cd9e..6eff20080 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -99,6 +99,7 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int i, j, k; int modifier; int byte = 0; + const int rounding = strength > 0 ? 1 << (strength - 1) : 0; for (i = 0, k = 0; i < block_size; i++) { for (j = 0; j < block_size; j++, k++) { @@ -111,7 +112,7 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); modifier *= modifier; modifier *= 3; - modifier += 1 << (strength - 1); + modifier += rounding; modifier >>= strength; if (modifier > 16) diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index ae3c86aee..91d8ea4dc 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -113,12 +113,9 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride, \ unsigned int *sse) { \ - unsigned int var; \ - int avg; \ -\ - variance(a, a_stride, b, b_stride, W, H, &var, &avg); \ - *sse = var; \ - return var - (((int64_t)avg * avg) / (W * H)); \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ } #define SUBPIX_VAR(W, H) \ @@ -159,69 +156,36 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ } - -void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); -} - -void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, - const uint8_t *ref_ptr, int ref_stride, - unsigned int *sse, int *sum) { - variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); -} - -unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); - *sse = var; - return var; + int sum; + variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum); + return *sse; } -unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); - *sse = var; - return var; + int sum; + variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum); + return *sse; } -unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); - *sse = var; - return var; + int sum; + variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum); + return *sse; } -unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, +unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { - unsigned int var; - int avg; - - variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); - *sse = var; - return var; + int sum; + variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum); + return *sse; } VAR(4, 4) diff --git a/vp9/encoder/x86/vp9_dct32x32_sse2.c b/vp9/encoder/x86/vp9_dct32x32_sse2.c index 2d59775ce..42fdbbdc5 100644 --- a/vp9/encoder/x86/vp9_dct32x32_sse2.c +++ b/vp9/encoder/x86/vp9_dct32x32_sse2.c @@ -12,6 +12,9 @@ #include "vp9/common/vp9_idct.h" // for cospi constants #include "vpx_ports/mem.h" +#define pair_set_epi32(a, b) \ + _mm_set_epi32(b, a, b, a) + #if FDCT32x32_HIGH_PRECISION static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { __m128i buf0, buf1; |