diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 66 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 283 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 20 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 101 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 3 |
11 files changed, 299 insertions, 205 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 8464882ea..88b67cf5e 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1445,40 +1445,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, vp9_extend_frame_borders(dst); } -#define WRITE_RECON_BUFFER 0 -#if WRITE_RECON_BUFFER -void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - FILE *yframe; - int i; - char filename[255]; - - snprintf(filename, sizeof(filename), "cx\\y%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->y_height; i++) - fwrite(frame->y_buffer + i * frame->y_stride, - frame->y_width, 1, yframe); - - fclose(yframe); - snprintf(filename, sizeof(filename), "cx\\u%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->u_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); - snprintf(filename, sizeof(filename), "cx\\v%04d.raw", this_frame); - yframe = fopen(filename, "wb"); - - for (i = 0; i < frame->uv_height; i++) - fwrite(frame->v_buffer + i * frame->uv_stride, - frame->uv_width, 1, yframe); - - fclose(yframe); -} -#endif - // Function to test for conditions that indicate we should loop // back and recode a frame. static int recode_loop_test(const VP9_COMP *cpi, @@ -2013,18 +1979,16 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, } } -static void configure_skippable_frame(VP9_COMP *cpi) { +static int is_skippable_frame(const VP9_COMP *cpi) { // If the current frame does not have non-zero motion vector detected in the // first pass, and so do its previous and forward frames, then this frame // can be skipped for partition check, and the partition size is assigned // according to the variance + const SVC *const svc = &cpi->svc; + const TWO_PASS *const twopass = is_spatial_svc(cpi) ? + &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass; - SVC *const svc = &cpi->svc; - TWO_PASS *const twopass = is_spatial_svc(cpi) ? - &svc->layer_context[svc->spatial_layer_id].twopass - : &cpi->twopass; - - cpi->skippable_frame = (!frame_is_intra_only(&cpi->common) && + return (!frame_is_intra_only(&cpi->common) && twopass->stats_in - 2 > twopass->stats_in_start && twopass->stats_in < twopass->stats_in_end && (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion @@ -2198,7 +2162,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // second pass according to the first pass stats if (oxcf->pass == 2 && (!cpi->use_svc || is_spatial_svc(cpi))) { - configure_skippable_frame(cpi); + cpi->skippable_frame = is_skippable_frame(cpi); } // For 1 pass CBR, check if we are dropping this frame. @@ -2280,27 +2244,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, cm->frame_to_show = get_frame_new_buffer(cm); -#if WRITE_RECON_BUFFER - if (cm->show_frame) - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); -#endif - // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); -#if WRITE_RECON_BUFFER - if (cm->show_frame) - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 2000); - else - write_cx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 3000); -#endif - // build the bitstream cpi->dummy_packing = 0; vp9_pack_bitstream(cpi, dest, size); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 2dba67c54..ace673457 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -523,6 +523,10 @@ static INLINE int get_chessboard_index(const int frame_index) { return frame_index & 0x1; } +static INLINE int *cond_sad_list(const struct VP9_COMP *cpi, int *sad_list) { + return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 6e04e2a9c..b8e716492 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -34,6 +34,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; MV ref_full; + int sad_list[5]; // Further step/diamond searches as necessary int step_param = mv_sf->reduce_first_step_size; @@ -45,8 +46,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, ref_full.row = ref_mv->row >> 3; /*cpi->sf.search_method == HEX*/ - vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, &v_fn_ptr, 0, - ref_mv, dst_mv); + vp9_hex_search(x, &ref_full, step_param, x->errorperbit, 0, + cond_sad_list(cpi, sad_list), + &v_fn_ptr, 0, ref_mv, dst_mv); // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) @@ -55,8 +57,10 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, unsigned int sse; cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, - &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, NULL, NULL, &distortion, - &sse, NULL, 0, 0); + &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, + cond_sad_list(cpi, sad_list), + NULL, NULL, + &distortion, &sse, NULL, 0, 0); } xd->mi[0]->mbmi.mode = NEWMV; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index ae924d596..d6f6b2563 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -256,6 +256,137 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } \ } +#define SETUP_SUBPEL_SEARCH \ + const uint8_t *const z = x->plane[0].src.buf; \ + const int src_stride = x->plane[0].src.stride; \ + const MACROBLOCKD *xd = &x->e_mbd; \ + unsigned int besterr = INT_MAX; \ + unsigned int sse; \ + unsigned int whichdir; \ + int thismse; \ + const unsigned int halfiters = iters_per_step; \ + const unsigned int quarteriters = iters_per_step; \ + const unsigned int eighthiters = iters_per_step; \ + const int y_stride = xd->plane[0].pre[0].stride; \ + const int offset = bestmv->row * y_stride + bestmv->col; \ + const uint8_t *const y = xd->plane[0].pre[0].buf; \ + \ + int rr = ref_mv->row; \ + int rc = ref_mv->col; \ + int br = bestmv->row * 8; \ + int bc = bestmv->col * 8; \ + int hstep = 4; \ + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); \ + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); \ + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); \ + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); \ + int tr = br; \ + int tc = bc; \ + \ + bestmv->row *= 8; \ + bestmv->col *= 8; \ + if (second_pred != NULL) { \ + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ + } else { \ + besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \ + } \ + *distortion = besterr; \ + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + +int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *sad_list, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { + SETUP_SUBPEL_SEARCH; + + if (sad_list && + sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && + sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && + sad_list[4] != INT_MAX) { + unsigned int left, right, up, down, diag; + whichdir = (sad_list[1] < sad_list[3] ? 0 : 1) + + (sad_list[2] < sad_list[4] ? 0 : 2); + switch (whichdir) { + case 0: + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(diag, tr - hstep, tc - hstep); + break; + case 1: + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(up, tr - hstep, tc); + CHECK_BETTER(diag, tr - hstep, tc + hstep); + break; + case 2: + CHECK_BETTER(left, tr, tc - hstep); + CHECK_BETTER(down, tr + hstep, tc); + CHECK_BETTER(diag, tr + hstep, tc - hstep); + break; + case 3: + CHECK_BETTER(right, tr, tc + hstep); + CHECK_BETTER(down, tr + hstep, tc); + CHECK_BETTER(diag, tr + hstep, tc + hstep); + break; + } + } else { + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } + } + + tr = br; + tc = bc; + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void) tr; + (void) tc; + + bestmv->row = br; + bestmv->col = bc; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, @@ -263,55 +394,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, + int *sad_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h) { - const uint8_t *const z = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - unsigned int sse; - unsigned int whichdir; - int thismse; - const unsigned int halfiters = iters_per_step; - const unsigned int quarteriters = iters_per_step; - const unsigned int eighthiters = iters_per_step; - - const int y_stride = xd->plane[0].pre[0].stride; - const int offset = bestmv->row * y_stride + bestmv->col; - const uint8_t *const y = xd->plane[0].pre[0].buf; - - int rr = ref_mv->row; - int rc = ref_mv->col; - int br = bestmv->row * 8; - int bc = bestmv->col * 8; - int hstep = 4; - const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); - - int tr = br; - int tc = bc; - - // central mv - bestmv->row *= 8; - bestmv->col *= 8; - - // calculate central point error - // TODO(yunqingwang): central pointer error was already calculated in full- - // pixel search, and can be passed in this function. - if (second_pred != NULL) { - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); - } else { - besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); - } - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + SETUP_SUBPEL_SEARCH; + (void) sad_list; // to silence compiler warning // Each subsequent iteration checks at least one point in // common with the last iteration could be 2 ( if diag selected) @@ -398,14 +488,17 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) { // Each scale can have a different number of candidates and shape of // candidates as indicated in the num_candidates and candidates arrays // passed into this function +// static int vp9_pattern_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, - int do_init_search, int do_refine, + int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv, MV *best_mv, + const MV *center_mv, + MV *best_mv, const int num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES] [MAX_PATTERN_CANDIDATES]) { @@ -413,7 +506,7 @@ static int vp9_pattern_search(const MACROBLOCK *x, static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; - int i, j, s, t; + int i, s, t; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; @@ -552,47 +645,38 @@ static int vp9_pattern_search(const MACROBLOCK *x, } while (s--); } - // Check 4 1-away neighbors if do_refine is true. - // For most well-designed schemes do_refine will not be necessary. - if (do_refine) { - static const MV neighbors[4] = {{0, -1}, { -1, 0}, {1, 0}, {0, 1}}; - - for (j = 0; j < 16; j++) { - int best_site = -1; - if (check_bounds(x, br, bc, 1)) { - for (i = 0; i < 4; i++) { - const MV this_mv = {br + neighbors[i].row, - bc + neighbors[i].col}; - thissad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < 4; i++) { - const MV this_mv = {br + neighbors[i].row, - bc + neighbors[i].col}; - if (!is_mv_in(x, &this_mv)) - continue; - thissad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), - in_what->stride); - CHECK_BETTER - } + // Returns the one-away integer pel sad values around the best as follows: + // sad_list[0]: sad at the best integer pel + // sad_list[1]: sad at delta {0, -1} (left) from the best integer pel + // sad_list[2]: sad at delta {-1, 0} (top) from the best integer pel + // sad_list[3]: sad at delta { 0, 1} (right) from the best integer pel + // sad_list[4]: sad at delta { 1, 0} (bottom) from the best integer pel + if (sad_list) { + static const MV neighbors[4] = {{0, -1}, {-1, 0}, {0, 1}, {1, 0}}; + sad_list[0] = bestsad; + if (check_bounds(x, br, bc, 1)) { + for (i = 0; i < 4; i++) { + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); } - - if (best_site == -1) { - break; - } else { - br += neighbors[best_site].row; - bc += neighbors[best_site].col; + } else { + for (i = 0; i < 4; i++) { + const MV this_mv = {br + neighbors[i].row, + bc + neighbors[i].col}; + if (!is_mv_in(x, &this_mv)) + sad_list[i + 1] = INT_MAX; + else + sad_list[i + 1] = vfp->sdf(what->buf, what->stride, + get_buf_from_mv(in_what, &this_mv), + in_what->stride); } } } - best_mv->row = br; best_mv->col = bc; - return bestsad; } @@ -634,6 +718,7 @@ int vp9_hex_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { @@ -658,7 +743,7 @@ int vp9_hex_search(const MACROBLOCK *x, { -1024, 0}}, }; return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, 0, vfp, use_mvcost, + do_init_search, sad_list, vfp, use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); } @@ -668,6 +753,7 @@ int vp9_bigdia_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -699,7 +785,7 @@ int vp9_bigdia_search(const MACROBLOCK *x, {-512, 512}, {-1024, 0}}, }; return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, 0, vfp, use_mvcost, + do_init_search, sad_list, vfp, use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); } @@ -709,6 +795,7 @@ int vp9_square_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, @@ -740,7 +827,7 @@ int vp9_square_search(const MACROBLOCK *x, {0, 1024}, {-1024, 1024}, {-1024, 0}}, }; return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, 0, vfp, use_mvcost, + do_init_search, sad_list, vfp, use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); } @@ -750,12 +837,13 @@ int vp9_fast_hex_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, // must be zero for fast_hex + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return vp9_hex_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, vfp, use_mvcost, + sad_per_bit, do_init_search, sad_list, vfp, use_mvcost, center_mv, best_mv); } @@ -764,13 +852,14 @@ int vp9_fast_dia_search(const MACROBLOCK *x, int search_param, int sad_per_bit, int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return vp9_bigdia_search(x, ref_mv, MAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, vfp, use_mvcost, - center_mv, best_mv); + sad_per_bit, do_init_search, sad_list, vfp, + use_mvcost, center_mv, best_mv); } #undef CHECK_BETTER @@ -1368,33 +1457,41 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, + int *sad_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = sf->mv.search_method; vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; + if (sad_list) { + sad_list[0] = INT_MAX; + sad_list[1] = INT_MAX; + sad_list[2] = INT_MAX; + sad_list[3] = INT_MAX; + sad_list[4] = INT_MAX; + } switch (method) { case FAST_DIAMOND: var = vp9_fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - fn_ptr, 1, ref_mv, tmp_mv); + sad_list, fn_ptr, 1, ref_mv, tmp_mv); break; case FAST_HEX: var = vp9_fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - fn_ptr, 1, ref_mv, tmp_mv); + sad_list, fn_ptr, 1, ref_mv, tmp_mv); break; case HEX: var = vp9_hex_search(x, mvp_full, step_param, error_per_bit, 1, - fn_ptr, 1, ref_mv, tmp_mv); + sad_list, fn_ptr, 1, ref_mv, tmp_mv); break; case SQUARE: var = vp9_square_search(x, mvp_full, step_param, error_per_bit, 1, - fn_ptr, 1, ref_mv, tmp_mv); + sad_list, fn_ptr, 1, ref_mv, tmp_mv); break; case BIGDIA: var = vp9_bigdia_search(x, mvp_full, step_param, error_per_bit, 1, - fn_ptr, 1, ref_mv, tmp_mv); + sad_list, fn_ptr, 1, ref_mv, tmp_mv); break; case NSTEP: var = vp9_full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 298fbb6c9..9b4734a6f 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -79,6 +79,7 @@ typedef int (integer_mv_pattern_search_fn) ( int search_param, int error_per_bit, int do_init_search, + int *sad_list, const vp9_variance_fn_ptr_t *vf, int use_mvcost, const MV *center_mv, @@ -98,12 +99,14 @@ typedef int (fractional_mv_step_fp) ( const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, + int *sad_list, int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, int h); extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; +extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, @@ -136,8 +139,10 @@ struct VP9_COMP; int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int error_per_bit, + int *sad_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 7a7bb2824..28d8302fa 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -126,6 +126,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; int rv = 0; + int sad_list[5]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); if (cpi->common.show_frame && @@ -152,8 +153,9 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.col >>= 3; mvp_full.row >>= 3; - vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, &ref_mv, - &tmp_mv->as_mv, INT_MAX, 0); + vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, + cond_sad_list(cpi, sad_list), + &ref_mv, &tmp_mv->as_mv, INT_MAX, 0); x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; @@ -179,6 +181,7 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, + cond_sad_list(cpi, sad_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); x->pred_mv[ref] = tmp_mv->as_mv; diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 2841efabe..b9e44088d 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -364,18 +364,15 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, int ref_frame, BLOCK_SIZE block_size) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - int_mv this_mv; int i; int zero_seen = 0; int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; int max_mv = 0; - uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; - int row_offset, col_offset; - int num_mv_refs = MAX_MV_REF_CANDIDATES + + const int num_mv_refs = MAX_MV_REF_CANDIDATES + (cpi->sf.adaptive_motion_search && cpi->common.show_frame && block_size < cpi->sf.max_partition_size); @@ -387,19 +384,16 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, // Get the sad for each candidate reference mv. for (i = 0; i < num_mv_refs; ++i) { - this_mv.as_mv = pred_mv[i]; + const MV *this_mv = &pred_mv[i]; - max_mv = MAX(max_mv, - MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3); - // Only need to check zero mv once. - if (!this_mv.as_int && zero_seen) + max_mv = MAX(max_mv, MAX(abs(this_mv->row), abs(this_mv->col)) >> 3); + if (is_zero_mv(this_mv) && zero_seen) continue; - zero_seen = zero_seen || !this_mv.as_int; + zero_seen |= is_zero_mv(this_mv); - row_offset = this_mv.as_mv.row >> 3; - col_offset = this_mv.as_mv.col >> 3; - ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset; + ref_y_ptr = + &ref_y_buffer[ref_y_stride * (this_mv->row >> 3) + (this_mv->col >> 3)]; // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5f1b0a515..2efa3db52 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -171,19 +171,43 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, int64_t dist_sum = 0; const int ref = xd->mi[0]->mbmi.ref_frame[0]; unsigned int sse; + unsigned int var = 0; const int shift = 8; + int rate; + int64_t dist; + + x->pred_sse[ref] = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - - const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, - &sse); + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; + const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; + int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); + int idx, idy; + int lw = b_width_log2_lookup[unit_size] + 2; + int lh = b_height_log2_lookup[unit_size] + 2; + + x->bsse[i] = 0; + + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); + uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); + + var += cpi->fn_ptr[unit_size].vf(src , p->src.stride, + dst, pd->dst.stride, &sse); + + x->bsse[i] += sse; + if (i == 0) + x->pred_sse[ref] += sse; + } + } if (!x->select_tx_size) { - if (sse < p->quant_thred[0] >> shift) + if (x->bsse[i] < p->quant_thred[0] >> shift) x->skip_txfm[i] = 1; else if (var < p->quant_thred[1] >> shift) x->skip_txfm[i] = 2; @@ -191,10 +215,6 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, x->skip_txfm[i] = 0; } - x->bsse[i] = sse; - if (i == 0) - x->pred_sse[ref] = sse; - // Fast approximate the modelling function. if (cpi->oxcf.speed > 4) { int64_t rate; @@ -210,9 +230,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, rate_sum += rate; dist_sum += dist; } else { - int rate; - int64_t dist; - vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + vp9_model_rd_from_var_lapndz(x->bsse[i], 1 << num_pels_log2_lookup[bs], pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist; @@ -1360,6 +1378,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, int sadpb = x->sadperbit4; MV mvp_full; int max_mv; + int sad_list[5]; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ @@ -1403,9 +1422,11 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv); - bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, - sadpb, &bsi->ref_mv[0]->as_mv, new_mv, - INT_MAX, 1); + bestsme = vp9_full_pixel_search( + cpi, x, bsize, &mvp_full, step_param, sadpb, + cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL, + &bsi->ref_mv[0]->as_mv, new_mv, + INT_MAX, 1); // Should we do a full search (best quality only) if (cpi->oxcf.mode == BEST) { @@ -1417,6 +1438,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, sadpb, 16, &cpi->fn_ptr[bsize], &bsi->ref_mv[0]->as_mv, &best_mv->as_mv); + sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX; if (thissme < bestsme) { bestsme = thissme; *new_mv = best_mv->as_mv; @@ -1429,17 +1451,19 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int distortion; - cpi->find_fractional_mv_step(x, - new_mv, - &bsi->ref_mv[0]->as_mv, - cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, - &distortion, - &x->pred_sse[mbmi->ref_frame[0]], - NULL, 0, 0); + cpi->find_fractional_mv_step( + x, + new_mv, + &bsi->ref_mv[0]->as_mv, + cm->allow_high_precision_mv, + x->errorperbit, &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_sad_list(cpi, sad_list), + x->nmvjointcost, x->mvcost, + &distortion, + &x->pred_sse[mbmi->ref_frame[0]], + NULL, 0, 0); // save motion search result for use in compound prediction seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv; @@ -1767,6 +1791,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; + int sad_list[5]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); @@ -1839,6 +1864,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.row >>= 3; bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb, + cond_sad_list(cpi, sad_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); x->mv_col_min = tmp_col_min; @@ -1854,6 +1880,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, + cond_sad_list(cpi, sad_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); } @@ -1978,6 +2005,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, + NULL, x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, pw, ph); @@ -2130,7 +2158,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); int pred_exists = 0; int intpel_mv; - int64_t rd, best_rd = INT64_MAX; + int64_t rd, tmp_rd, best_rd = INT64_MAX; int best_needs_copy = 0; uint8_t *orig_dst[MAX_MB_PLANE]; int orig_dst_stride[MAX_MB_PLANE]; @@ -2220,6 +2248,10 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, * if the first is known */ *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]); + if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && + mbmi->mode != NEARESTMV) + return INT64_MAX; + pred_exists = 0; // Are all MVs integer pel for Y and UV intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv); @@ -2315,6 +2347,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (cm->interp_filter != SWITCHABLE && cm->interp_filter == mbmi->interp_filter)) { pred_exists = 1; + tmp_rd = best_rd; } } restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -2333,17 +2366,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, xd->plane[i].dst.stride = 64; } } + rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); } else { + int tmp_rate; + int64_t tmp_dist; // Handles the special case when a filter that is not in the - // switchable list (ex. bilinear, 6-tap) is indicated at the frame level + // switchable list (ex. bilinear) is indicated at the frame level, or + // skip condition holds. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); } if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { - int tmp_rate; - int64_t tmp_dist; - model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist); - rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); // if current pred_error modeled rd is substantially more than the best // so far, do not bother doing full rd if (rd / 2 > ref_best_rd) { @@ -2353,7 +2388,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (cm->interp_filter == SWITCHABLE) - *rate2 += vp9_get_switchable_rate(cpi); + *rate2 += rs; if (!is_comp_pred) { if (cpi->allow_encode_breakout) diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 879c83c08..0afcde535 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -436,6 +436,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) { if (sf->mv.subpel_search_method == SUBPEL_TREE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; + } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; } cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index e2e5c1e99..46eedc147 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -40,6 +40,7 @@ typedef enum { typedef enum { SUBPEL_TREE = 0, + SUBPEL_TREE_PRUNED = 1, // Other methods to come } SUBPEL_SEARCH_METHODS; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 076d77683..045e3590a 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -145,6 +145,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int bestsme = INT_MAX; int distortion; unsigned int sse; + int sad_list[5]; MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -168,6 +169,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, // Ignore mv costing by sending NULL pointer instead of cost arrays vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, + cond_sad_list(cpi, sad_list), &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv); // Ignore mv costing by sending NULL pointer instead of cost array @@ -177,6 +179,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step, + cond_sad_list(cpi, sad_list), NULL, NULL, &distortion, &sse, NULL, 0, 0); |