diff options
Diffstat (limited to 'vp8/encoder')
-rw-r--r-- | vp8/encoder/block.h | 2 | ||||
-rw-r--r-- | vp8/encoder/encodeframe.c | 2 | ||||
-rw-r--r-- | vp8/encoder/encodemv.c | 31 | ||||
-rw-r--r-- | vp8/encoder/encodemv.h | 2 | ||||
-rw-r--r-- | vp8/encoder/ethreading.c | 4 | ||||
-rw-r--r-- | vp8/encoder/firstpass.c | 6 | ||||
-rw-r--r-- | vp8/encoder/mcomp.c | 111 | ||||
-rw-r--r-- | vp8/encoder/mcomp.h | 2 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 8 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 4 | ||||
-rw-r--r-- | vp8/encoder/ratectrl.c | 2 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 12 | ||||
-rw-r--r-- | vp8/encoder/temporal_filter.c | 65 | ||||
-rw-r--r-- | vp8/encoder/x86/quantize_sse2.asm | 94 |
14 files changed, 143 insertions, 202 deletions
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 2fd67822b..5a2568dde 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -86,7 +86,7 @@ typedef struct int mvcosts[2][MVvals+1]; int *mvcost[2]; - int mvsadcosts[2][MVvals+1]; + int mvsadcosts[2][MVfpvals+1]; int *mvsadcost[2]; int mbmode_cost[2][MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 3d280005d..ac0f93790 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1117,7 +1117,7 @@ void vp8_encode_frame(VP8_COMP *cpi) } if (flag[0] || flag[1]) - vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag); + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); } #endif diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index 6b1e6f965..a4849c654 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -134,31 +134,14 @@ static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) return cost; // + vp8_cost_bit( p [MVPsign], v < 0); } -//#define M_LOG2_E 0.693147180559945309417 -//#define log2f(x) (log (x) / (float) M_LOG2_E) -void vp8_build_component_cost_table(int *mvcost[2], int *mvsadcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) +void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) { int i = 1; //-mv_max; unsigned int cost0 = 0; unsigned int cost1 = 0; vp8_clear_system_state(); -#if 0 - mvsadcost [0] [0] = 300; - mvsadcost [1] [0] = 300; - - do - { - double z = 256 * (2 * (log2f(2 * i) + .6)); - mvsadcost [0][i] = (int) z; - mvsadcost [1][i] = (int) z; - mvsadcost [0][-i] = (int) z; - mvsadcost [1][-i] = (int) z; - } - while (++i <= mv_max); - -#endif i = 1; @@ -193,16 +176,6 @@ void vp8_build_component_cost_table(int *mvcost[2], int *mvsadcost[2], const MV_ } while (++i <= mv_max); } - - /* - i=-mv_max; - do - { - mvcost [0] [i] = cost_mvcomponent( i, mvc[0]); - mvcost [1] [i] = cost_mvcomponent( i, mvc[1]); - } - while( ++i <= mv_max); - */ } @@ -436,7 +409,7 @@ void vp8_write_mvprobs(VP8_COMP *cpi) ); if (flags[0] || flags[1]) - vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags); + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flags); #ifdef ENTROPY_STATS active_section = 5; diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h index e4481bff0..a6116c133 100644 --- a/vp8/encoder/encodemv.h +++ b/vp8/encoder/encodemv.h @@ -16,6 +16,6 @@ void vp8_write_mvprobs(VP8_COMP *); void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *); -void vp8_build_component_cost_table(int *mvcost[2], int *mvsadcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]); +void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]); #endif diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index d73542226..8aef915b8 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -319,8 +319,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); z->mvcost[0] = &z->mvcosts[0][mv_max+1]; z->mvcost[1] = &z->mvcosts[1][mv_max+1]; - z->mvsadcost[0] = &z->mvsadcosts[0][mv_max+1]; - z->mvsadcost[1] = &z->mvsadcosts[1][mv_max+1]; + z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; + z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 3ebbba4d6..9a7774863 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -446,7 +446,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, M xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; // Initial step/diamond search centred on best mv - tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv); + tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param, x->errorperbit, &num00, &v_fn_ptr, x->mvcost, ref_mv); if ( tmp_err < INT_MAX-new_mv_mode_penalty ) tmp_err += new_mv_mode_penalty; @@ -469,7 +469,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, M num00--; else { - tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvsadcost, x->mvcost, ref_mv); + tmp_err = cpi->diamond_search_sad(x, b, d, ref_mv, &tmp_mv, step_param + n, x->errorperbit, &num00, &v_fn_ptr, x->mvcost, ref_mv); if ( tmp_err < INT_MAX-new_mv_mode_penalty ) tmp_err += new_mv_mode_penalty; @@ -540,7 +540,7 @@ void vp8_first_pass(VP8_COMP *cpi) int flag[2] = {1, 1}; vp8_initialize_rd_consts(cpi, cm->base_qindex+cm->y1dc_delta_q); vpx_memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); - vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag); + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cm->fc.mvc, flag); } // for each macroblock row in image diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index de6642b75..37c30da14 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -54,6 +54,11 @@ static int mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit) //return (vp8_mv_bit_cost(mv, ref, mvcost, 128) * error_per_bit + 128) >> 8; } +static int mvsad_err_cost(MV *mv, MV *ref, int *mvsadcost[2], int error_per_bit) +{ + /* Calculate sad error cost on full pixel basis. */ + return ((mvsadcost[0][(mv->row - ref->row)] + mvsadcost[1][(mv->col - ref->col)]) * error_per_bit + 128) >> 8; +} static int mv_bits(MV *mv, MV *ref, int *mvcost[2]) { @@ -753,7 +758,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } -#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) +#define MVC(r,c) (((mvsadcost[0][r-rr] + mvsadcost[1][c-rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector #define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost @@ -801,8 +806,8 @@ int vp8_hex_search if (br > x->mv_row_max) br = x->mv_row_max; - rr >>= 1; - rc >>= 1; + rr >>= 3; + rc >>= 3; besterr = ERR(br, bc, thiserr); @@ -915,7 +920,6 @@ int vp8_diamond_search_sad int error_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], int *mvcost[2], MV *center_mv ) @@ -944,8 +948,16 @@ int vp8_diamond_search_sad unsigned char *check_here; int thissad; + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + *num00 = 0; + best_mv->row = ref_row; + best_mv->col = ref_col; + // Work out the start point for the search in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); best_address = in_what; @@ -955,7 +967,7 @@ int vp8_diamond_search_sad (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, error_per_bit); } // search_param determines the length of the initial step and hence the number of iterations @@ -964,8 +976,6 @@ int vp8_diamond_search_sad tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; for (step = 0; step < tot_steps ; step++) { @@ -984,9 +994,9 @@ int vp8_diamond_search_sad if (thissad < bestsad) { - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.row = this_row_offset; + this_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1031,7 +1041,6 @@ int vp8_diamond_search_sadx4 int error_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], int *mvcost[2], MV *center_mv ) @@ -1060,7 +1069,14 @@ int vp8_diamond_search_sadx4 unsigned char *check_here; unsigned int thissad; + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + *num00 = 0; + best_mv->row = ref_row; + best_mv->col = ref_col; // Work out the start point for the search in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); @@ -1071,7 +1087,7 @@ int vp8_diamond_search_sadx4 (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, error_per_bit); } // search_param determines the length of the initial step and hence the number of iterations @@ -1080,8 +1096,6 @@ int vp8_diamond_search_sadx4 tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; for (step = 0; step < tot_steps ; step++) { @@ -1111,9 +1125,9 @@ int vp8_diamond_search_sadx4 { if (sad_array[t] < bestsad) { - this_mv.row = (best_mv->row + ss[i].mv.row) << 3; - this_mv.col = (best_mv->col + ss[i].mv.col) << 3; - sad_array[t] += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.row = best_mv->row + ss[i].mv.row; + this_mv.col = best_mv->col + ss[i].mv.col; + sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (sad_array[t] < bestsad) { @@ -1140,9 +1154,9 @@ int vp8_diamond_search_sadx4 if (thissad < bestsad) { - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.row = this_row_offset; + this_mv.col = this_col_offset; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1178,7 +1192,7 @@ int vp8_diamond_search_sadx4 #if !(CONFIG_REALTIME_ONLY) -int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) +int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; @@ -1202,6 +1216,11 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro int col_min = ref_col - distance; int col_max = ref_col + distance; + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + // Work out the mid point for the search in_what = *(d->base_pre) + d->pre; bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; @@ -1216,7 +1235,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro // Baseline value at the centre //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, error_per_bit); } // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border @@ -1234,17 +1253,17 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro for (r = row_min; r < row_max ; r++) { - this_mv.row = r << 3; + this_mv.row = r; check_here = r * mv_stride + in_what + col_min; for (c = col_min; c < col_max; c++) { thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - this_mv.col = c << 3; + this_mv.col = c; //thissad += (int)sqrt(mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14)); //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)]; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); if (thissad < bestsad) { @@ -1268,7 +1287,7 @@ int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int erro return INT_MAX; } -int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) +int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; @@ -1294,6 +1313,11 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er unsigned int sad_array[3]; + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + // Work out the mid point for the search in_what = *(d->base_pre) + d->pre; bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; @@ -1306,7 +1330,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, error_per_bit); } // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border @@ -1324,7 +1348,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er for (r = row_min; r < row_max ; r++) { - this_mv.row = r << 3; + this_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; @@ -1340,8 +1364,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er if (thissad < bestsad) { - this_mv.col = c << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.col = c; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1363,8 +1387,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er if (thissad < bestsad) { - this_mv.col = c << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.col = c; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1391,7 +1415,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er return INT_MAX; } -int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) +int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; @@ -1418,6 +1442,11 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er DECLARE_ALIGNED_ARRAY(16, unsigned short, sad_array8, 8); unsigned int sad_array[3]; + int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]}; + MV fcenter_mv; + fcenter_mv.row = center_mv->row >> 3; + fcenter_mv.col = center_mv->col >> 3; + // Work out the mid point for the search in_what = *(d->base_pre) + d->pre; bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; @@ -1430,7 +1459,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) { // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mv_err_cost(ref_mv, center_mv, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, error_per_bit); } // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border @@ -1448,7 +1477,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er for (r = row_min; r < row_max ; r++) { - this_mv.row = r << 3; + this_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; @@ -1464,8 +1493,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er if (thissad < bestsad) { - this_mv.col = c << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.col = c; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1493,8 +1522,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er if (thissad < bestsad) { - this_mv.col = c << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.col = c; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { @@ -1516,8 +1545,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er if (thissad < bestsad) { - this_mv.col = c << 3; - thissad += mv_err_cost(&this_mv, center_mv, mvsadcost, error_per_bit); + this_mv.col = c; + thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 83f95c6e0..5efcec296 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -66,7 +66,6 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step; int distance, \ vp8_variance_fn_ptr_t *fn_ptr, \ int *mvcost[2], \ - int *mvsadcost[2], \ MV *center_mv \ ) @@ -82,7 +81,6 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step; int error_per_bit, \ int *num00, \ vp8_variance_fn_ptr_t *fn_ptr, \ - int *mvsadcost[2], \ int *mvcost[2], \ MV *center_mv \ ) diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 67c6f61d0..87f0f1853 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1863,13 +1863,13 @@ static void cal_mvsadcosts(int *mvsadcost[2]) do { - double z = 256 * (2 * (log2f(2 * i) + .6)); + double z = 256 * (2 * (log2f(8 * i) + .6)); mvsadcost [0][i] = (int) z; mvsadcost [1][i] = (int) z; mvsadcost [0][-i] = (int) z; mvsadcost [1][-i] = (int) z; } - while (++i <= mv_max); + while (++i <= mvfp_max); } VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) @@ -2065,8 +2065,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1]; cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1]; - cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mv_max+1]; - cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mv_max+1]; + cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1]; + cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1]; cal_mvsadcosts(cpi->mb.mvsadcost); diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 0790d3517..0edd806a2 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -738,7 +738,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec } else { - bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -757,7 +757,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9 if (thissme < bestsme) { diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 9c3dcdb27..f3bcf9921 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -355,7 +355,7 @@ void vp8_setup_key_frame(VP8_COMP *cpi) vpx_memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); { int flag[2] = {1, 1}; - vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); + vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *) cpi->common.fc.mvc, flag); } vpx_memset(cpi->common.fc.pre_mvc, 0, sizeof(cpi->common.fc.pre_mvc)); //initialize pre_mvc to all zero. diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 6d9e33a69..f125e952a 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1224,7 +1224,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, { bestsme = cpi->diamond_search_sad(x, c, e, bsi->mvp, &mode_mv[NEW4X4], step_param, - sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); + sadpb / 2, &num00, v_fn_ptr, x->mvcost, bsi->ref_mv); n = num00; num00 = 0; @@ -1239,7 +1239,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, { thissme = cpi->diamond_search_sad(x, c, e, bsi->mvp, &temp_mv, step_param + n, - sadpb / 2, &num00, v_fn_ptr, x->mvsadcost, x->mvcost, bsi->ref_mv); + sadpb / 2, &num00, v_fn_ptr, x->mvcost, bsi->ref_mv); if (thissme < bestsme) { @@ -1257,7 +1257,7 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { thissme = cpi->full_search_sad(x, c, e, bsi->mvp, - sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost,bsi->ref_mv); + sadpb / 4, 16, v_fn_ptr, x->mvcost, bsi->ref_mv); if (thissme < bestsme) { @@ -2167,7 +2167,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } else { - bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -2186,7 +2186,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9 if (thissme < bestsme) { @@ -2232,7 +2232,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int { int sadpb = x->sadperbit16 >> 2; - thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost,&best_ref_mv); + thissme = cpi->full_search_sad(x, b, d, &full_mvp, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); } // Barrier threshold to initiating full search diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index fd36b22eb..cec951897 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -195,63 +195,14 @@ static int vp8_temporal_filter_find_matching_mb_c further_steps = 0; } - if (1/*cpi->sf.search_method == HEX*/) - { - // TODO Check that the 16x16 vf & sdf are selected here - bestsme = vp8_hex_search(x, b, d, - &best_ref_mv1, &d->bmi.mv.as_mv, - step_param, - sadpb/*x->errorperbit*/, - &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost, &best_ref_mv1); - } - else - { - int mv_x, mv_y; - - bestsme = cpi->diamond_search_sad(x, b, d, - &best_ref_mv1, &d->bmi.mv.as_mv, - step_param, - sadpb / 2/*x->errorperbit*/, - &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost, &best_ref_mv1); //sadpb < 9 - - // Further step/diamond searches as necessary - n = 0; - //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; - - n = num00; - num00 = 0; - - while (n < further_steps) - { - n++; - - if (num00) - num00--; - else - { - thissme = cpi->diamond_search_sad(x, b, d, - &best_ref_mv1, &d->bmi.mv.as_mv, - step_param + n, - sadpb / 4/*x->errorperbit*/, - &num00, &cpi->fn_ptr[BLOCK_16X16], - mvsadcost, mvcost, &best_ref_mv1); //sadpb = 9 - - if (thissme < bestsme) - { - bestsme = thissme; - mv_y = d->bmi.mv.as_mv.row; - mv_x = d->bmi.mv.as_mv.col; - } - else - { - d->bmi.mv.as_mv.row = mv_y; - d->bmi.mv.as_mv.col = mv_x; - } - } - } - } + /*cpi->sf.search_method == HEX*/ + // TODO Check that the 16x16 vf & sdf are selected here + bestsme = vp8_hex_search(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param, + sadpb/*x->errorperbit*/, + &num00, &cpi->fn_ptr[BLOCK_16X16], + mvsadcost, mvcost, &best_ref_mv1); #if ALT_REF_SUBPEL_ENABLED // Try sub-pixel MC? diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm index 9a1584024..e00faebd1 100644 --- a/vp8/encoder/x86/quantize_sse2.asm +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -22,35 +22,36 @@ sym(vp8_regular_quantize_b_sse2): mov rbp, rsp SAVE_XMM GET_GOT rbx - push rsi %if ABI_IS_32BIT push rdi + push rsi %else %ifidn __OUTPUT_FORMAT__,x64 push rdi + push rsi %endif %endif ALIGN_STACK 16, rax - %define BLOCKD_d 0 ; 8 - %define zrun_zbin_boost 8 ; 8 - %define abs_minus_zbin 16 ; 32 - %define temp_qcoeff 48 ; 32 - %define qcoeff 80 ; 32 - %define stack_size 112 + %define zrun_zbin_boost 0 ; 8 + %define abs_minus_zbin 8 ; 32 + %define temp_qcoeff 40 ; 32 + %define qcoeff 72 ; 32 + %define stack_size 104 sub rsp, stack_size ; end prolog %if ABI_IS_32BIT - mov rdi, arg(0) + mov rdi, arg(0) ; BLOCK *b + mov rsi, arg(1) ; BLOCKD *d %else %ifidn __OUTPUT_FORMAT__,x64 mov rdi, rcx ; BLOCK *b - mov [rsp + BLOCKD_d], rdx + mov rsi, rdx ; BLOCKD *d %else ;mov rdi, rdi ; BLOCK *b - mov [rsp + BLOCKD_d], rsi + ;mov rsi, rsi ; BLOCKD *d %endif %endif @@ -125,59 +126,52 @@ sym(vp8_regular_quantize_b_sse2): movdqa [rsp + qcoeff], xmm6 movdqa [rsp + qcoeff + 16], xmm6 - mov rsi, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr + mov rdx, [rdi + vp8_block_zrun_zbin_boost] ; zbin_boost_ptr mov rax, [rdi + vp8_block_quant_shift] ; quant_shift_ptr - mov [rsp + zrun_zbin_boost], rsi + mov [rsp + zrun_zbin_boost], rdx %macro ZIGZAG_LOOP 1 - movsx edx, WORD PTR[GLOBAL(zig_zag + (%1 * 2))] ; rc - ; x - movsx ecx, WORD PTR[rsp + abs_minus_zbin + rdx *2] + movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] ; if (x >= zbin) - sub cx, WORD PTR[rsi] ; x - zbin - lea rsi, [rsi + 2] ; zbin_boost_ptr++ + sub cx, WORD PTR[rdx] ; x - zbin + lea rdx, [rdx + 2] ; zbin_boost_ptr++ jl rq_zigzag_loop_%1 ; x < zbin - movsx edi, WORD PTR[rsp + temp_qcoeff + rdx *2] + movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] - ; downshift by quant_shift[rdx] - movsx ecx, WORD PTR[rax + rdx*2] ; quant_shift_ptr[rc] + ; downshift by quant_shift[rc] + movsx ecx, WORD PTR[rax + %1 * 2] ; quant_shift_ptr[rc] sar edi, cl ; also sets Z bit je rq_zigzag_loop_%1 ; !y - mov WORD PTR[rsp + qcoeff + rdx*2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] - mov rsi, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost + mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] + mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost rq_zigzag_loop_%1: %endmacro -ZIGZAG_LOOP 0 -ZIGZAG_LOOP 1 -ZIGZAG_LOOP 2 -ZIGZAG_LOOP 3 -ZIGZAG_LOOP 4 -ZIGZAG_LOOP 5 -ZIGZAG_LOOP 6 -ZIGZAG_LOOP 7 -ZIGZAG_LOOP 8 -ZIGZAG_LOOP 9 -ZIGZAG_LOOP 10 -ZIGZAG_LOOP 11 +; in vp8_default_zig_zag1d order: see vp8/common/entropy.c +ZIGZAG_LOOP 0 +ZIGZAG_LOOP 1 +ZIGZAG_LOOP 4 +ZIGZAG_LOOP 8 +ZIGZAG_LOOP 5 +ZIGZAG_LOOP 2 +ZIGZAG_LOOP 3 +ZIGZAG_LOOP 6 +ZIGZAG_LOOP 9 ZIGZAG_LOOP 12 ZIGZAG_LOOP 13 +ZIGZAG_LOOP 10 +ZIGZAG_LOOP 7 +ZIGZAG_LOOP 11 ZIGZAG_LOOP 14 ZIGZAG_LOOP 15 movdqa xmm2, [rsp + qcoeff] movdqa xmm3, [rsp + qcoeff + 16] -%if ABI_IS_32BIT - mov rdi, arg(1) -%else - mov rdi, [rsp + BLOCKD_d] -%endif - - mov rcx, [rdi + vp8_blockd_dequant] ; dequant_ptr - mov rsi, [rdi + vp8_blockd_dqcoeff] ; dqcoeff_ptr + mov rcx, [rsi + vp8_blockd_dequant] ; dequant_ptr + mov rdi, [rsi + vp8_blockd_dqcoeff] ; dqcoeff_ptr ; y ^ sz pxor xmm2, xmm0 @@ -190,15 +184,15 @@ ZIGZAG_LOOP 15 movdqa xmm0, [rcx] movdqa xmm1, [rcx + 16] - mov rcx, [rdi + vp8_blockd_qcoeff] ; qcoeff_ptr + mov rcx, [rsi + vp8_blockd_qcoeff] ; qcoeff_ptr pmullw xmm0, xmm2 pmullw xmm1, xmm3 movdqa [rcx], xmm2 ; store qcoeff movdqa [rcx + 16], xmm3 - movdqa [rsi], xmm0 ; store dqcoeff - movdqa [rsi + 16], xmm1 + movdqa [rdi], xmm0 ; store dqcoeff + movdqa [rdi + 16], xmm1 ; select the last value (in zig_zag order) for EOB pcmpeqw xmm2, xmm6 @@ -220,19 +214,20 @@ ZIGZAG_LOOP 15 pmaxsw xmm2, xmm3 movd eax, xmm2 and eax, 0xff - mov [rdi + vp8_blockd_eob], eax + mov [rsi + vp8_blockd_eob], eax ; begin epilog add rsp, stack_size pop rsp %if ABI_IS_32BIT + pop rsi pop rdi %else %ifidn __OUTPUT_FORMAT__,x64 + pop rsi pop rdi %endif %endif - pop rsi RESTORE_GOT RESTORE_XMM pop rbp @@ -347,11 +342,6 @@ sym(vp8_fast_quantize_b_impl_sse2): SECTION_RODATA align 16 -zig_zag: - dw 0x0000, 0x0001, 0x0004, 0x0008 - dw 0x0005, 0x0002, 0x0003, 0x0006 - dw 0x0009, 0x000c, 0x000d, 0x000a - dw 0x0007, 0x000b, 0x000e, 0x000f inv_zig_zag: dw 0x0001, 0x0002, 0x0006, 0x0007 dw 0x0003, 0x0005, 0x0008, 0x000d |