diff options
author | John Koleszar <jkoleszar@google.com> | 2011-05-11 00:05:07 -0400 |
---|---|---|
committer | John Koleszar <jkoleszar@google.com> | 2011-05-11 00:05:07 -0400 |
commit | 65b1648f35b3212bd8beb024d7c4095f138274d0 (patch) | |
tree | 66650904b86c685d29ad8843276e9a95ca3b337a /vp8/encoder | |
parent | 6edd07d6561522f81b2a09a2a160d74808b14363 (diff) | |
parent | ba6f60dba70ad56fbfd1080bb4555f078bc774bf (diff) | |
download | libvpx-65b1648f35b3212bd8beb024d7c4095f138274d0.tar libvpx-65b1648f35b3212bd8beb024d7c4095f138274d0.tar.gz libvpx-65b1648f35b3212bd8beb024d7c4095f138274d0.tar.bz2 libvpx-65b1648f35b3212bd8beb024d7c4095f138274d0.zip |
Merge remote branch 'internal/upstream' into HEAD
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/encodeframe.c | 1 | ||||
-rw-r--r-- | vp8/encoder/encodeintra.c | 2 | ||||
-rw-r--r-- | vp8/encoder/ethreading.c | 1 | ||||
-rw-r--r-- | vp8/encoder/generic/csystemdependent.c | 1 | ||||
-rw-r--r-- | vp8/encoder/mcomp.c | 181 | ||||
-rw-r--r-- | vp8/encoder/mcomp.h | 24 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 15 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 5 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 253 | ||||
-rw-r--r-- | vp8/encoder/x86/mcomp_x86.h | 3 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 1 |
11 files changed, 323 insertions, 164 deletions
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index f41b1d24c..f21ecbd98 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -883,6 +883,7 @@ void vp8_encode_frame(VP8_COMP *cpi) xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; + x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 9517a1d89..5d52c9fc4 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -122,7 +122,7 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mbuv(x); - if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) + if (x->optimize) vp8_optimize_mbuv(x, rtcd); vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index fa7c0ce32..0ae5ba810 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -264,6 +264,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; + x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; if (mb_row == cm->mb_rows - 1) { diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 1d6db40a1..d127ed114 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -90,6 +90,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; cpi->rtcd.search.full_search = vp8_full_search_sad; + cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index 9d447b210..90e398383 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -1621,6 +1621,187 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er return INT_MAX; } +int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) +{ + MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; + MV tempmv; + int i, j; + short this_row_offset, this_col_offset; + + int what_stride = b->src_stride; + int in_what_stride = d->pre_stride; + unsigned char *what = (*(b->base_src) + b->src); + unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + (ref_mv->row * (d->pre_stride)) + ref_mv->col); + unsigned char *check_here; + unsigned int thissad; + MV this_mv; + unsigned int bestsad = INT_MAX; + + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + + bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + + for (i=0; i<search_range; i++) + { + tempmv.row = ref_mv->row; + tempmv.col = ref_mv->col; + + for (j = 0 ; j < 4 ; j++) + { + this_row_offset = ref_mv->row + neighbors[j].row; + this_col_offset = ref_mv->col + neighbors[j].col; + + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) + { + check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.row = this_row_offset; + this_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + ref_mv->row = this_row_offset; + ref_mv->col = this_col_offset; + best_address = check_here; + } + } + } + } + + if (tempmv.row == ref_mv->row && tempmv.col == ref_mv->col ) + break; + } + + this_mv.row = ref_mv->row << 3; + this_mv.col = ref_mv->col << 3; + + if (bestsad < INT_MAX) + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) ++ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); + else + return INT_MAX; +} + +int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) +{ + MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}}; + MV tempmv; + int i, j; + short this_row_offset, this_col_offset; + + int what_stride = b->src_stride; + int in_what_stride = d->pre_stride; + unsigned char *what = (*(b->base_src) + b->src); + unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + (ref_mv->row * (d->pre_stride)) + ref_mv->col); + unsigned char *check_here; + unsigned int thissad; + MV this_mv; + unsigned int bestsad = INT_MAX; + + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + + bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); + + for (i=0; i<search_range; i++) + { + int all_in = 1; + + tempmv.row = ref_mv->row; + tempmv.col = ref_mv->col; + + all_in &= ((ref_mv->row - 1) > x->mv_row_min); + all_in &= ((ref_mv->row + 1) < x->mv_row_max); + all_in &= ((ref_mv->col - 1) > x->mv_col_min); + all_in &= ((ref_mv->col + 1) < x->mv_col_max); + + if(all_in) + { + unsigned int sad_array[4]; + unsigned char *block_offset[4]; + block_offset[0] = best_address - in_what_stride; + block_offset[1] = best_address - 1; + block_offset[2] = best_address + 1; + block_offset[3] = best_address + in_what_stride; + + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); + + for (j = 0; j < 4; j++) + { + if (sad_array[j] < bestsad) + { + this_mv.row = ref_mv->row + neighbors[j].row; + this_mv.col = ref_mv->col + neighbors[j].col; + sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); + + if (sad_array[j] < bestsad) + { + bestsad = sad_array[j]; + ref_mv->row = this_mv.row; + ref_mv->col = this_mv.col; + best_address = block_offset[j]; + } + } + } + } + else + { + for (j = 0 ; j < 4 ; j++) + { + this_row_offset = ref_mv->row + neighbors[j].row; + this_col_offset = ref_mv->col + neighbors[j].col; + + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) + { + check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address; + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.row = this_row_offset; + this_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + ref_mv->row = this_row_offset; + ref_mv->col = this_col_offset; + best_address = check_here; + } + } + } + } + } + + if (tempmv.row == ref_mv->row && tempmv.col == ref_mv->col ) + break; + } + + this_mv.row = ref_mv->row << 3; + this_mv.col = ref_mv->col << 3; + + if (bestsad < INT_MAX) + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) ++ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit); + else + return INT_MAX; +} + #ifdef ENTROPY_STATS void print_mode_context(void) { diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index b14cbcbc8..d655b837e 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -69,6 +69,20 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step; MV *center_mv \ ) +#define prototype_refining_search_sad(sym)\ + int (sym)\ + (\ + MACROBLOCK *x, \ + BLOCK *b, \ + BLOCKD *d, \ + MV *ref_mv, \ + int error_per_bit, \ + int distance, \ + vp8_variance_fn_ptr_t *fn_ptr, \ + int *mvcost[2], \ + MV *center_mv \ + ) + #define prototype_diamond_search_sad(sym)\ int (sym)\ (\ @@ -94,6 +108,10 @@ extern prototype_full_search_sad(vp8_full_search_sad); extern prototype_full_search_sad(vp8_full_search_sadx3); extern prototype_full_search_sad(vp8_full_search_sadx8); +typedef prototype_refining_search_sad(*vp8_refining_search_fn_t); +extern prototype_refining_search_sad(vp8_refining_search_sad); +extern prototype_refining_search_sad(vp8_refining_search_sadx4); + typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t); extern prototype_diamond_search_sad(vp8_diamond_search_sad); extern prototype_diamond_search_sad(vp8_diamond_search_sadx4); @@ -103,6 +121,11 @@ extern prototype_diamond_search_sad(vp8_diamond_search_sadx4); #endif extern prototype_full_search_sad(vp8_search_full_search); +#ifndef vp8_search_refining_search +#define vp8_search_refining_search vp8_refining_search_sad +#endif +extern prototype_refining_search_sad(vp8_search_refining_search); + #ifndef vp8_search_diamond_search #define vp8_search_diamond_search vp8_diamond_search_sad #endif @@ -111,6 +134,7 @@ extern prototype_diamond_search_sad(vp8_search_diamond_search); typedef struct { prototype_full_search_sad(*full_search); + prototype_refining_search_sad(*refining_search); prototype_diamond_search_sad(*diamond_search); } vp8_search_rtcd_vtable_t; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index b1e9e25a6..7701d4c16 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -636,9 +636,6 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->improved_mv_pred = 1; - cpi->do_full[0] = 0; - cpi->do_full[1] = 0; - // default thresholds to 0 for (i = 0; i < MAX_MODES; i++) sf->thresh_mult[i] = 0; @@ -1298,7 +1295,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) } if (cpi->sf.optimize_coefficients == 1) - cpi->mb.optimize = 1 + cpi->is_next_src_alt_ref; + cpi->mb.optimize = 1; else cpi->mb.optimize = 0; @@ -1809,7 +1806,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) // YX Temp cpi->alt_ref_source = NULL; cpi->is_src_frame_alt_ref = 0; - cpi->is_next_src_alt_ref = 0; + #if 0 // Experimental RD Code @@ -2041,9 +2038,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; } - cpi->check_freq[0] = 15; - cpi->check_freq[1] = 15; - #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif @@ -2149,6 +2143,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search); + cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search); cpi->ready_for_new_frame = 1; @@ -4738,7 +4733,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cm->show_frame = 0; cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. cpi->is_src_frame_alt_ref = 0; - cpi->is_next_src_alt_ref = 0; } } #endif @@ -4752,9 +4746,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cpi->is_src_frame_alt_ref = cpi->alt_ref_source && (cpi->source == cpi->alt_ref_source); - cpi->is_next_src_alt_ref = cpi->alt_ref_source - && (vp8_lookahead_peek(cpi->lookahead, 0) - == cpi->alt_ref_source); if(cpi->is_src_frame_alt_ref) cpi->alt_ref_source = NULL; } diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index f4ba7eb18..63505d5b7 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -289,7 +289,6 @@ typedef struct int source_alt_ref_active; // an alt ref frame has been encoded and is usable int is_src_frame_alt_ref; // source of frame to encode is an exact copy of an alt ref frame - int is_next_src_alt_ref; // source of next frame to encode is an exact copy of an alt ref frame int gold_is_last; // golden frame same as last frame ( short circuit gold searches) int alt_is_last; // Alt reference frame same as last ( short circuit altref search) @@ -315,9 +314,6 @@ typedef struct unsigned int mode_chosen_counts[MAX_MODES]; unsigned int mbs_tested_so_far; - unsigned int check_freq[2]; - unsigned int do_full[2]; - int rd_thresh_mult[MAX_MODES]; int rd_baseline_thresh[MAX_MODES]; int rd_threshes[MAX_MODES]; @@ -603,6 +599,7 @@ typedef struct fractional_mv_step_fp *find_fractional_mv_step; vp8_full_search_fn_t full_search_sad; + vp8_refining_search_fn_t refining_search_sad; vp8_diamond_search_fn_t diamond_search_sad; vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; unsigned int time_receive_data; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index bb38353ef..76a67cf76 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -2069,168 +2069,138 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int break; case NEWMV: + { + int thissme; + int bestsme = INT_MAX; + int step_param = cpi->sf.first_step; + int further_steps; + int n; + int do_refine=1; /* If last step (1-away) of n-step search doesn't pick the center point as the best match, + we will do a final 1-away diamond refining search */ - // Decrement full search counter - if (cpi->check_freq[lf_or_gf] > 0) - cpi->check_freq[lf_or_gf] --; + int sadpb = x->sadperbit16; - { - int thissme; - int bestsme = INT_MAX; - int step_param = cpi->sf.first_step; - int search_range; - int further_steps; - int n; + int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3; + int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3; + int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3; + int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3; - int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3; - int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3; - int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3; - int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. - if (x->mv_col_min < col_min ) - x->mv_col_min = col_min; - if (x->mv_col_max > col_max ) - x->mv_col_max = col_max; - if (x->mv_row_min < row_min ) - x->mv_row_min = row_min; - if (x->mv_row_max > row_max ) - x->mv_row_max = row_max; - - //adjust search range according to sr from mv prediction - if(sr > step_param) - step_param = sr; - - // Work out how long a search we should do - search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3; - - if (search_range >= x->vector_range) - x->vector_range = search_range; - else if (x->vector_range > cpi->sf.min_fs_radius) - x->vector_range--; - - // Initial step/diamond search - { - int sadpb = x->sadperbit16; - - if (cpi->sf.search_method == HEX) - { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; - } - else - { - bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9 - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; - // Further step/diamond searches as necessary - n = 0; - further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search. + if (x->mv_col_min < col_min ) + x->mv_col_min = col_min; + if (x->mv_col_max > col_max ) + x->mv_col_max = col_max; + if (x->mv_row_min < row_min ) + x->mv_row_min = row_min; + if (x->mv_row_max > row_max ) + x->mv_row_max = row_max; - n = num00; - num00 = 0; + //adjust search range according to sr from mv prediction + if(sr > step_param) + step_param = sr; - while (n < further_steps) - { - n++; + // Initial step/diamond search + if (cpi->sf.search_method == HEX) + { + bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + } + else + { + bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9 + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; - if (num00) - num00--; - else - { - thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9 + // Further step/diamond searches as necessary + n = 0; + further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - if (thissme < bestsme) - { - bestsme = thissme; - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; - } - else - { - d->bmi.mv.as_mv.row = mode_mv[NEWMV].row; - d->bmi.mv.as_mv.col = mode_mv[NEWMV].col; - } - } - } - } + n = num00; + num00 = 0; - } + /* If there won't be more n-step search, check to see if refining search is needed. */ + if (n > further_steps) + do_refine = 0; - // Should we do a full search - if (!cpi->check_freq[lf_or_gf] || cpi->do_full[lf_or_gf]) + while (n < further_steps) { - int thissme; - int full_flag_thresh = 0; + n++; - // Update x->vector_range based on best vector found in step search - search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); - //search_range *= 1.4; //didn't improve PSNR - - if (search_range > x->vector_range) - x->vector_range = search_range; + if (num00) + num00--; else - search_range = x->vector_range; - - // Apply limits - search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range; - - //add this to reduce full search range. - if(sr<=3 && search_range > 8) search_range = 8; - { - int sadpb = x->sadperbit16 >> 2; - /* use diamond search result as full search staring point */ - thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); - } + thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9 - // Barrier threshold to initiating full search - // full_flag_thresh = 10 + (thissme >> 7); - if ((thissme + full_flag_thresh) < bestsme) - { - cpi->do_full[lf_or_gf] ++; - bestsme = thissme; - } - else if (thissme < bestsme) - bestsme = thissme; - else - { - cpi->do_full[lf_or_gf] = cpi->do_full[lf_or_gf] >> 1; - cpi->check_freq[lf_or_gf] = cpi->sf.full_freq[lf_or_gf]; + /* check to see if refining search is needed. */ + if (num00 > (further_steps-n)) + do_refine = 0; - // The full search result is actually worse so re-instate the previous best vector - d->bmi.mv.as_mv.row = mode_mv[NEWMV].row; - d->bmi.mv.as_mv.col = mode_mv[NEWMV].col; + if (thissme < bestsme) + { + bestsme = thissme; + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + } + else + { + d->bmi.mv.as_mv.row = mode_mv[NEWMV].row; + d->bmi.mv.as_mv.col = mode_mv[NEWMV].col; + } } } + } + + /* final 1-away diamond refining search */ + if (do_refine == 1) + { + int search_range; - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; + //It seems not a good way to set search_range. Need further investigation. + //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col)); + search_range = 8; - if (bestsme < INT_MAX) + //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); + thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb/4, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); + + if (thissme < bestsme) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); + bestsme = thissme; + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } + else + { + d->bmi.mv.as_mv.row = mode_mv[NEWMV].row; + d->bmi.mv.as_mv.col = mode_mv[NEWMV].col; + } + } - mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; - mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; - - // Add the new motion vector cost to our rolling cost variable - rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + if (bestsme < INT_MAX) + { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); } + mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; + mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; + + // Add the new motion vector cost to our rolling cost variable + rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); + } + case NEARESTMV: case NEARMV: @@ -2466,17 +2436,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } - // If we have chosen new mv or split then decay the full search check count more quickly. - if ((vp8_mode_order[best_mode_index] == NEWMV) || (vp8_mode_order[best_mode_index] == SPLITMV)) - { - int lf_or_gf = (vp8_ref_frame_order[best_mode_index] == LAST_FRAME) ? 0 : 1; - - if (cpi->check_freq[lf_or_gf] && !cpi->do_full[lf_or_gf]) - { - cpi->check_freq[lf_or_gf] --; - } - } - // Keep a record of best mode index that we chose cpi->last_best_mode_index = best_mode_index; diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h index 3b7b29c21..efe7241f2 100644 --- a/vp8/encoder/x86/mcomp_x86.h +++ b/vp8/encoder/x86/mcomp_x86.h @@ -18,6 +18,9 @@ #undef vp8_search_full_search #define vp8_search_full_search vp8_full_search_sadx3 +#undef vp8_search_refining_search +#define vp8_search_refining_search vp8_refining_search_sadx4 + #undef vp8_search_diamond_search #define vp8_search_diamond_search vp8_diamond_search_sadx4 diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index c1fae468a..f65ef8a5f 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -272,6 +272,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; + cpi->rtcd.search.refining_search = vp8_refining_search_sadx4; } #endif |