diff options
Diffstat (limited to 'vp9/encoder/vp9_rdopt.c')
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 922 |
1 files changed, 513 insertions, 409 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index aa0557735..862e72f24 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -46,6 +46,12 @@ /* Factor to weigh the rate for switchable interp filters */ #define SWITCHABLE_INTERP_RATE_FACTOR 1 +DECLARE_ALIGNED(16, extern const uint8_t, + vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); + +#define I4X4_PRED 0x8000 +#define SPLITMV 0x10000 + const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {ZEROMV, LAST_FRAME, NONE}, {DC_PRED, INTRA_FRAME, NONE}, @@ -81,7 +87,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, GOLDEN_FRAME, NONE}, {SPLITMV, ALTREF_FRAME, NONE}, - {I4X4_PRED, INTRA_FRAME, NONE}, + {I4X4_PRED, INTRA_FRAME, NONE}, /* compound prediction modes */ {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, @@ -105,11 +111,31 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, GOLDEN_FRAME, ALTREF_FRAME}, }; +#if CONFIG_BALANCED_COEFTREE +static void fill_token_costs(vp9_coeff_count *c, + vp9_coeff_count *cnoskip, + vp9_coeff_probs_model *p, + TX_SIZE tx_size) { + int i, j, k, l; + for (i = 0; i < BLOCK_TYPES; i++) + for (j = 0; j < REF_TYPES; j++) + for (k = 0; k < COEF_BANDS; k++) + for (l = 0; l < PREV_COEF_CONTEXTS; l++) { + vp9_prob probs[ENTROPY_NODES]; + vp9_model_to_full_probs(p[i][j][k][l], probs); + vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs, + vp9_coef_tree); + // Replace the eob node prob with a very small value so that the + // cost approximately equals the cost without the eob node + probs[1] = 1; + vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree); + } +} +#else static void fill_token_costs(vp9_coeff_count *c, vp9_coeff_probs_model *p, TX_SIZE tx_size) { int i, j, k, l; - for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) @@ -120,6 +146,7 @@ static void fill_token_costs(vp9_coeff_count *c, vp9_coef_tree); } } +#endif static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -210,6 +237,20 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { } } +#if CONFIG_BALANCED_COEFTREE + fill_token_costs(cpi->mb.token_costs[TX_4X4], + cpi->mb.token_costs_noskip[TX_4X4], + cpi->common.fc.coef_probs_4x4, TX_4X4); + fill_token_costs(cpi->mb.token_costs[TX_8X8], + cpi->mb.token_costs_noskip[TX_8X8], + cpi->common.fc.coef_probs_8x8, TX_8X8); + fill_token_costs(cpi->mb.token_costs[TX_16X16], + cpi->mb.token_costs_noskip[TX_16X16], + cpi->common.fc.coef_probs_16x16, TX_16X16); + fill_token_costs(cpi->mb.token_costs[TX_32X32], + cpi->mb.token_costs_noskip[TX_32X32], + cpi->common.fc.coef_probs_32x32, TX_32X32); +#else fill_token_costs(cpi->mb.token_costs[TX_4X4], cpi->common.fc.coef_probs_4x4, TX_4X4); fill_token_costs(cpi->mb.token_costs[TX_8X8], @@ -218,6 +259,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { cpi->common.fc.coef_probs_16x16, TX_16X16); fill_token_costs(cpi->mb.token_costs[TX_32X32], cpi->common.fc.coef_probs_32x32, TX_32X32); +#endif for (i = 0; i < NUM_PARTITION_CONTEXTS; i++) vp9_cost_tokens(cpi->mb.partition_cost[i], @@ -225,7 +267,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { vp9_partition_tree); /*rough estimate for costing*/ - cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4; vp9_init_mode_costs(cpi); if (cpi->common.frame_type != KEY_FRAME) { @@ -271,7 +312,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, TX_TYPE tx_type = DCT_DCT; const int segment_id = xd->mode_info_context->mbmi.segment_id; +#if CONFIG_BALANCED_COEFTREE + unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = + mb->token_costs_noskip[tx_size][type][ref]; +#else vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; +#endif + int seg_eob, default_eob; uint8_t token_cache[1024]; const uint8_t * band_translate; @@ -291,8 +338,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; +#if !CONFIG_BALANCED_COEFTREE vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref], coef_probs); +#endif seg_eob = 16; scan = get_scan_4x4(tx_type); band_translate = vp9_coefband_trans_4x4; @@ -307,8 +356,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, above_ec = (A[0] + A[1]) != 0; left_ec = (L[0] + L[1]) != 0; scan = get_scan_8x8(tx_type); +#if !CONFIG_BALANCED_COEFTREE vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref], coef_probs); +#endif seg_eob = 64; band_translate = vp9_coefband_trans_8x8plus; break; @@ -320,8 +371,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; scan = get_scan_16x16(tx_type); +#if !CONFIG_BALANCED_COEFTREE vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref], coef_probs); +#endif seg_eob = 256; above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3]) != 0; @@ -330,8 +383,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, } case TX_32X32: scan = vp9_default_scan_32x32; +#if !CONFIG_BALANCED_COEFTREE vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref], coef_probs); +#endif seg_eob = 1024; above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0; left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0; @@ -362,18 +417,30 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); +#if CONFIG_BALANCED_COEFTREE + if (!c || token_cache[scan[c - 1]]) // do not skip eob + cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v]; + else + cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v]; +#else cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v]; - if (!c || token_cache[scan[c - 1]]) cost += vp9_cost_bit(coef_probs[band][pt][0], 1); - token_cache[scan[c]] = t; +#endif + token_cache[scan[c]] = vp9_pt_energy_class[t]; } if (c < seg_eob) { if (c) pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob); +#if CONFIG_BALANCED_COEFTREE + cost += mb->token_costs_noskip[tx_size][type][ref] + [get_coef_band(band_translate, c)] + [pt][DCT_EOB_TOKEN]; +#else cost += mb->token_costs[tx_size][type][ref] [get_coef_band(band_translate, c)] [pt][DCT_EOB_TOKEN]; +#endif } } @@ -556,9 +623,25 @@ static void super_block_yrd(VP9_COMP *cpi, int64_t txfm_cache[NB_TXFM_MODES]) { VP9_COMMON *const cm = &cpi->common; int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB]; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; vp9_subtract_sby(x, bs); + if (cpi->speed > 4) { + if (bs >= BLOCK_SIZE_SB32X32) { + mbmi->txfm_size = TX_32X32; + } else if (bs >= BLOCK_SIZE_MB16X16) { + mbmi->txfm_size = TX_16X16; + } else if (bs >= BLOCK_SIZE_SB8X8) { + mbmi->txfm_size = TX_8X8; + } else { + mbmi->txfm_size = TX_4X4; + } + super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs, + mbmi->txfm_size); + return; + } if (bs >= BLOCK_SIZE_SB32X32) super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], bs, TX_32X32); @@ -611,11 +694,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, int64_t this_rd; int ratey = 0; - xd->mode_info_context->bmi[ib].as_mode.first = mode; - if (cm->frame_type == KEY_FRAME) - rate = bmode_costs[mode]; - else - rate = x->mbmode_cost[cm->frame_type][mode]; + rate = bmode_costs[mode]; distortion = 0; vpx_memcpy(tempa, ta, sizeof(ta)); @@ -653,9 +732,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), 16) >> 2; - vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode, - dst, xd->plane[0].dst.stride); - if (best_tx_type != DCT_DCT) vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16), dst, xd->plane[0].dst.stride, best_tx_type); @@ -726,16 +802,15 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int64_t total_rd = 0; ENTROPY_CONTEXT t_above[4], t_left[4]; int *bmode_costs; + MODE_INFO *const mic = xd->mode_info_context; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - xd->mode_info_context->mbmi.mode = I4X4_PRED; - bmode_costs = mb->inter_bmode_costs; + bmode_costs = mb->mbmode_cost; for (idy = 0; idy < 2; idy += bh) { for (idx = 0; idx < 2; idx += bw) { - MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry); @@ -747,7 +822,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, const MB_PREDICTION_MODE L = (xd->left_available || idx) ? left_block_mode(mic, i) : DC_PRED; - bmode_costs = mb->bmode_costs[A][L]; + bmode_costs = mb->y_mode_costs[A][L]; } total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs, @@ -774,6 +849,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, *Rate = cost; *rate_y = tot_rate_y; *Distortion = distortion; + xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } @@ -785,12 +861,13 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t txfm_cache[NB_TXFM_MODES]) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); - MACROBLOCKD *xd = &x->e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; int this_rate, this_rate_tokenonly; int this_distortion, s; int64_t best_rd = INT64_MAX, this_rd; TX_SIZE UNINITIALIZED_IS_SAFE(best_tx); int i; + int *bmode_costs = x->mbmode_cost; if (bsize < BLOCK_SIZE_SB8X8) { x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4; @@ -805,17 +882,19 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t local_txfm_cache[NB_TXFM_MODES]; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; - const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); - const MB_PREDICTION_MODE L = xd->left_available ? - left_block_mode(mic, 0) : DC_PRED; - - int *bmode_costs = x->bmode_costs[A][L]; + if (cpi->common.frame_type == KEY_FRAME) { + const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis); + const MB_PREDICTION_MODE L = xd->left_available ? + left_block_mode(mic, 0) : DC_PRED; + bmode_costs = x->y_mode_costs[A][L]; + } x->e_mbd.mode_info_context->mbmi.mode = mode; vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, bsize, local_txfm_cache); + this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); @@ -925,10 +1004,10 @@ int vp9_cost_mv_ref(VP9_COMP *cpi, VP9_COMMON *pc = &cpi->common; vp9_prob p [VP9_MVREFS - 1]; - assert(NEARESTMV <= m && m <= SPLITMV); + assert(NEARESTMV <= m && m <= NEWMV); vp9_mv_ref_probs(pc, p, mode_context); - return cost_token(vp9_mv_ref_tree, p, - vp9_mv_ref_encoding_array - NEARESTMV + m); + return cost_token(vp9_sb_mv_ref_tree, p, + vp9_sb_mv_ref_encoding_array - NEARESTMV + m); } else return 0; } @@ -938,19 +1017,18 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; } -static int labels2mode(MACROBLOCK *x, - int const *labelings, int which_label, +static int labels2mode(MACROBLOCK *x, int i, MB_PREDICTION_MODE this_mode, int_mv *this_mv, int_mv *this_second_mv, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], - int_mv seg_mvs[MAX_REF_FRAMES - 1], + int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv, int_mv *second_best_ref_mv, int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mode_info_context; MB_MODE_INFO * mbmi = &mic->mbmi; - int i, cost = 0, thismvcost = 0; + int cost = 0, thismvcost = 0; int idx, idy; int bw = 1 << b_width_log2(mbmi->sb_type); int bh = 1 << b_height_log2(mbmi->sb_type); @@ -958,72 +1036,61 @@ static int labels2mode(MACROBLOCK *x, /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - for (i = 0; i < 4; ++i) { - MB_PREDICTION_MODE m; - - if (labelings[i] != which_label) - continue; + MB_PREDICTION_MODE m; - { - // the only time we should do costing for new motion vector or mode - // is when we are on a new label (jbb May 08, 2007) - switch (m = this_mode) { - case NEWMV: - if (mbmi->second_ref_frame > 0) { - this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; - this_second_mv->as_int = - seg_mvs[mbmi->second_ref_frame - 1].as_int; - } - - thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, - 102, xd->allow_high_precision_mv); - if (mbmi->second_ref_frame > 0) { - thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, - mvjcost, mvcost, 102, - xd->allow_high_precision_mv); - } - break; - case NEARESTMV: - this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int; - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = - frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int; - break; - case NEARMV: - this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int; - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = - frame_mv[NEARMV][mbmi->second_ref_frame].as_int; - break; - case ZEROMV: - this_mv->as_int = 0; - if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = 0; - break; - default: - break; + // the only time we should do costing for new motion vector or mode + // is when we are on a new label (jbb May 08, 2007) + switch (m = this_mode) { + case NEWMV: + this_mv->as_int = seg_mvs[mbmi->ref_frame].as_int; + thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, + 102, xd->allow_high_precision_mv); + if (mbmi->second_ref_frame > 0) { + this_second_mv->as_int = seg_mvs[mbmi->second_ref_frame].as_int; + thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, + mvjcost, mvcost, 102, + xd->allow_high_precision_mv); } + break; + case NEARESTMV: + this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int; + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = + frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int; + break; + case NEARMV: + this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int; + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = + frame_mv[NEARMV][mbmi->second_ref_frame].as_int; + break; + case ZEROMV: + this_mv->as_int = 0; + if (mbmi->second_ref_frame > 0) + this_second_mv->as_int = 0; + break; + default: + break; + } - cost = vp9_cost_mv_ref(cpi, this_mode, - mbmi->mb_mode_context[mbmi->ref_frame]); - } + cost = vp9_cost_mv_ref(cpi, this_mode, + mbmi->mb_mode_context[mbmi->ref_frame]); - mic->bmi[i].as_mv[0].as_int = this_mv->as_int; - if (mbmi->second_ref_frame > 0) - mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; + mic->bmi[i].as_mv[0].as_int = this_mv->as_int; + if (mbmi->second_ref_frame > 0) + mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int; - x->partition_info->bmi[i].mode = m; - x->partition_info->bmi[i].mv.as_int = this_mv->as_int; - if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; - for (idy = 0; idy < bh; ++idy) { - for (idx = 0; idx < bw; ++idx) { - vpx_memcpy(&mic->bmi[i + idy * 2 + idx], - &mic->bmi[i], sizeof(mic->bmi[i])); - vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], - &x->partition_info->bmi[i], - sizeof(x->partition_info->bmi[i])); - } + x->partition_info->bmi[i].mode = m; + x->partition_info->bmi[i].mv.as_int = this_mv->as_int; + if (mbmi->second_ref_frame > 0) + x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int; + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + vpx_memcpy(&mic->bmi[i + idy * 2 + idx], + &mic->bmi[i], sizeof(mic->bmi[i])); + vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx], + &x->partition_info->bmi[i], + sizeof(x->partition_info->bmi[i])); } } @@ -1033,90 +1100,86 @@ static int labels2mode(MACROBLOCK *x, static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, MACROBLOCK *x, - int const *labels, - int which_label, + int i, int *labelyrate, int *distortion, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { - int i, k; + int k; MACROBLOCKD *xd = &x->e_mbd; BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type; int bwl = b_width_log2(bsize), bw = 1 << bwl; int bhl = b_height_log2(bsize), bh = 1 << bhl; int idx, idy; + const int src_stride = x->plane[0].src.stride; + uint8_t* const src = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, src_stride); + int16_t* src_diff = + raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src_diff); + int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); + uint8_t* const pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + uint8_t* const dst = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); + int thisdistortion = 0; + int thisrate = 0; *labelyrate = 0; *distortion = 0; - for (i = 0; i < 4; i++) { - if (labels[i] == which_label) { - const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - x->plane[0].src.buf, src_stride); - int16_t* src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, - x->plane[0].src_diff); - int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); - uint8_t* const pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); - int thisdistortion = 0; - int thisrate = 0; - - vp9_build_inter_predictor(pre, - xd->plane[0].pre[0].stride, - dst, - xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[0], - &xd->scale_factor[0], - 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); - - // TODO(debargha): Make this work properly with the - // implicit-compoundinter-weight experiment when implicit - // weighting for splitmv modes is turned on. - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - uint8_t* const second_pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, - xd->plane[0].pre[1].buf, - xd->plane[0].pre[1].stride); - vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, - dst, xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[1], - &xd->scale_factor[1], 4 * bw, 4 * bh, 1, - &xd->subpix); - } - vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, - src, src_stride, - dst, xd->plane[0].dst.stride); + vp9_build_inter_predictor(pre, + xd->plane[0].pre[0].stride, + dst, + xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[0], + &xd->scale_factor[0], + 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix); + + // TODO(debargha): Make this work properly with the + // implicit-compoundinter-weight experiment when implicit + // weighting for splitmv modes is turned on. + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + uint8_t* const second_pre = + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[1].buf, + xd->plane[0].pre[1].stride); + vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, + dst, xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[1], + &xd->scale_factor[1], 4 * bw, 4 * bh, 1, + &xd->subpix); + } - k = i; - for (idy = 0; idy < bh; ++idy) { - for (idx = 0; idx < bw; ++idx) { - k += (idy * 2 + idx); - src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, - x->plane[0].src_diff); - coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); - x->fwd_txm4x4(src_diff, coeff, 16); - x->quantize_b_4x4(x, k, DCT_DCT, 16); - thisdistortion += vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, - k, 16), 16); - thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, - ta + (k & 1), - tl + (k >> 1), TX_4X4, 16); - } - } - *distortion += thisdistortion; - *labelyrate += thisrate; + vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8, + src, src_stride, + dst, xd->plane[0].dst.stride); + + k = i; + for (idy = 0; idy < bh; ++idy) { + for (idx = 0; idx < bw; ++idx) { + k += (idy * 2 + idx); + src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k, + x->plane[0].src_diff); + coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k); + x->fwd_txm4x4(src_diff, coeff, 16); + x->quantize_b_4x4(x, k, DCT_DCT, 16); + thisdistortion += vp9_block_error(coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, + k, 16), 16); + thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC, + ta + (k & 1), + tl + (k >> 1), TX_4X4, 16); } } + *distortion += thisdistortion; + *labelyrate += thisrate; + *distortion >>= 2; return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } @@ -1188,11 +1251,45 @@ static enum BlockSize get_block_size(int bw, int bh) { return -1; } +static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { + MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; + x->plane[0].src.buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); + x->e_mbd.plane[0].pre[0].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); + if (mbmi->second_ref_frame) + x->e_mbd.plane[0].pre[1].buf = + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i, + x->e_mbd.plane[0].pre[1].buf, + x->e_mbd.plane[0].pre[1].stride); +} + +static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, + struct buf_2d orig_pre[2]) { + MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi; + x->plane[0].src = orig_src; + x->e_mbd.plane[0].pre[0] = orig_pre[0]; + if (mbmi->second_ref_frame) + x->e_mbd.plane[0].pre[1] = orig_pre[1]; +} + +static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, + int_mv *frame_mv, + YV12_BUFFER_CONFIG **scaled_ref_frame, + int mi_row, int mi_col, + int_mv single_newmv[MAX_REF_FRAMES]); + static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, - int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + int_mv seg_mvs[4][MAX_REF_FRAMES], + int mi_row, int mi_col) { int i, j; - static const int labels[4] = { 0, 1, 2, 3 }; int br = 0, bd = 0; MB_PREDICTION_MODE this_mode; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; @@ -1208,7 +1305,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int bhl = b_height_log2(bsize), bh = 1 << bhl; int idx, idy; vp9_variance_fn_ptr_t *v_fn_ptr; - + YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; ENTROPY_CONTEXT t_above[4], t_left[4]; ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; @@ -1255,18 +1352,21 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, int distortion; int labelyrate; ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; + const struct buf_2d orig_src = x->plane[0].src; + struct buf_2d orig_pre[2]; + + vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre)); vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); // motion search for newmv (single predictor case only) if (mbmi->second_ref_frame <= 0 && this_mode == NEWMV) { - int sseshift, n; int step_param = 0; int further_steps; int thissme, bestsme = INT_MAX; - const struct buf_2d orig_src = x->plane[0].src; - const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0]; + int sadpb = x->sadperbit4; + int_mv mvp_full; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ @@ -1287,55 +1387,35 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - { - int sadpb = x->sadperbit4; - int_mv mvp_full; - - mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; - mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; - - // find first label - n = i; - - // adjust src pointer for this segment - x->plane[0].src.buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, - x->plane[0].src.buf, - x->plane[0].src.stride); - assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); - x->e_mbd.plane[0].pre[0].buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, - x->e_mbd.plane[0].pre[0].buf, - x->e_mbd.plane[0].pre[0].stride); - - bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, - sadpb, further_steps, 0, v_fn_ptr, - bsi->ref_mv, &mode_mv[NEWMV]); - - sseshift = 0; - - // Should we do a full search (best quality only) - if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - /* Check if mvp_full is within the range. */ - clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, - x->mv_row_min, x->mv_row_max); - - thissme = cpi->full_search_sad(x, &mvp_full, - sadpb, 16, v_fn_ptr, - x->nmvjointcost, x->mvcost, - bsi->ref_mv, - n); - - if (thissme < bestsme) { - bestsme = thissme; - mode_mv[NEWMV].as_int = - x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; - } else { - /* The full search result is actually worse so re-instate the - * previous best vector */ - x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = - mode_mv[NEWMV].as_int; - } + mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; + mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; + + // adjust src pointer for this block + mi_buf_shift(x, i); + bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, + sadpb, further_steps, 0, v_fn_ptr, + bsi->ref_mv, &mode_mv[NEWMV]); + + // Should we do a full search (best quality only) + if (cpi->compressor_speed == 0) { + /* Check if mvp_full is within the range. */ + clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, + x->mv_row_min, x->mv_row_max); + + thissme = cpi->full_search_sad(x, &mvp_full, + sadpb, 16, v_fn_ptr, + x->nmvjointcost, x->mvcost, + bsi->ref_mv, i); + + if (thissme < bestsme) { + bestsme = thissme; + mode_mv[NEWMV].as_int = + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int; + } else { + /* The full search result is actually worse so re-instate the + * previous best vector */ + x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = + mode_mv[NEWMV].as_int; } } @@ -1348,23 +1428,32 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, &distortion, &sse); // safe motion search result for use in compound prediction - seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEWMV].as_int; + seg_mvs[i][mbmi->ref_frame].as_int = mode_mv[NEWMV].as_int; } // restore src pointers - x->plane[0].src = orig_src; - x->e_mbd.plane[0].pre[0] = orig_pre; + mi_buf_restore(x, orig_src, orig_pre); } else if (mbmi->second_ref_frame > 0 && this_mode == NEWMV) { - /* NEW4X4 */ - /* motion search not completed? Then skip newmv for this block with - * comppred */ - if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV || - seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) { + if (seg_mvs[i][mbmi->second_ref_frame].as_int == INVALID_MV || + seg_mvs[i][mbmi->ref_frame ].as_int == INVALID_MV) continue; + + // adjust src pointers + mi_buf_shift(x, i); + if (cpi->sf.comp_inter_joint_search) { + iterative_motion_search(cpi, x, bsize, frame_mv[this_mode], + scaled_ref_frame, + mi_row, mi_col, seg_mvs[i]); + seg_mvs[i][mbmi->ref_frame].as_int = + frame_mv[this_mode][mbmi->ref_frame].as_int; + seg_mvs[i][mbmi->second_ref_frame].as_int = + frame_mv[this_mode][mbmi->second_ref_frame].as_int; } + // restore src pointers + mi_buf_restore(x, orig_src, orig_pre); } - rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], + rate = labels2mode(x, i, this_mode, &mode_mv[this_mode], &second_mode_mv[this_mode], frame_mv, seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost, cpi); @@ -1381,7 +1470,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, continue; this_rd = encode_inter_mb_segment(&cpi->common, - x, labels, i, &labelyrate, + x, i, &labelyrate, &distortion, t_above_s, t_left_s); this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); rate += labelyrate; @@ -1392,10 +1481,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; - for (j = 0; j < 4; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.plane[0].eobs[j]; - + best_eobs[i] = x->e_mbd.plane[0].eobs[i]; vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); } @@ -1404,7 +1490,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, vpx_memcpy(t_above, t_above_b, sizeof(t_above)); vpx_memcpy(t_left, t_left_b, sizeof(t_left)); - labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], + labels2mode(x, i, mode_selected, &mode_mv[mode_selected], &second_mode_mv[mode_selected], frame_mv, seg_mvs[i], bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost, x->mvcost, cpi); @@ -1443,12 +1529,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, } } -static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, - BEST_SEG_INFO *bsi, - int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { - rd_check_segment_txsize(cpi, x, bsi, seg_mvs); -} - static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv, @@ -1457,7 +1537,8 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int *returnyrate, int *returndistortion, int *skippable, int mvthresh, - int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + int_mv seg_mvs[4][MAX_REF_FRAMES], + int mi_row, int mi_col) { int i; BEST_SEG_INFO bsi; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; @@ -1473,7 +1554,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < 4; i++) bsi.modes[i] = ZEROMV; - rd_check_segment(cpi, x, &bsi, seg_mvs); + rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col); /* set it to the best */ for (i = 0; i < 4; i++) { @@ -1504,6 +1585,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, *returndistortion = bsi.d; *returnyrate = bsi.segment_yrate; *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); + mbmi->mode = bsi.modes[3]; return (int)(bsi.segment_rd); } @@ -1878,6 +1960,154 @@ static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) { return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; } +static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, + int_mv *frame_mv, + YV12_BUFFER_CONFIG **scaled_ref_frame, + int mi_row, int mi_col, + int_mv single_newmv[MAX_REF_FRAMES]) { + int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + int refs[2] = { mbmi->ref_frame, + (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) }; + int_mv ref_mv[2]; + const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]); + int ite; + // Prediction buffer from second frame. + uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); + + // Do joint motion search in compound mode to get more accurate mv. + struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; + struct buf_2d scaled_first_yv12; + int last_besterr[2] = {INT_MAX, INT_MAX}; + + ref_mv[0] = mbmi->ref_mvs[refs[0]][0]; + ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; + + if (scaled_ref_frame[0]) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[0]; + setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, + NULL, NULL); + } + + if (scaled_ref_frame[1]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + backup_second_yv12[i] = xd->plane[i].pre[1]; + + setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, + NULL, NULL); + } + + xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], + mi_row, mi_col); + xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], + mi_row, mi_col); + scaled_first_yv12 = xd->plane[0].pre[0]; + + // Initialize mv using single prediction mode result. + frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; + + // Allow joint search multiple times iteratively for each ref frame + // and break out the search loop if it couldn't find better mv. + for (ite = 0; ite < 4; ite++) { + struct buf_2d ref_yv12[2]; + int bestsme = INT_MAX; + int sadpb = x->sadperbit16; + int_mv tmp_mv; + int search_range = 3; + + int tmp_col_min = x->mv_col_min; + int tmp_col_max = x->mv_col_max; + int tmp_row_min = x->mv_row_min; + int tmp_row_max = x->mv_row_max; + int id = ite % 2; + + // Initialized here because of compiler problem in Visual Studio. + ref_yv12[0] = xd->plane[0].pre[0]; + ref_yv12[1] = xd->plane[0].pre[1]; + + // Get pred block from second frame. + vp9_build_inter_predictor(ref_yv12[!id].buf, + ref_yv12[!id].stride, + second_pred, pw, + &frame_mv[refs[!id]], + &xd->scale_factor[!id], + pw, ph, 0, + &xd->subpix); + + // Compound motion search on first ref frame. + if (id) + xd->plane[0].pre[0] = ref_yv12[id]; + vp9_clamp_mv_min_max(x, &ref_mv[id]); + + // Use mv result from single mode as mvp. + tmp_mv.as_int = frame_mv[refs[id]].as_int; + + tmp_mv.as_mv.col >>= 3; + tmp_mv.as_mv.row >>= 3; + + // Small-range full-pixel motion search + bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, + search_range, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &ref_mv[id], second_pred, + pw, ph); + + x->mv_col_min = tmp_col_min; + x->mv_col_max = tmp_col_max; + x->mv_row_min = tmp_row_min; + x->mv_row_max = tmp_row_max; + + if (bestsme < INT_MAX) { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + + bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, + &ref_mv[id], + x->errorperbit, + &cpi->fn_ptr[block_size], + x->nmvjointcost, x->mvcost, + &dis, &sse, second_pred, + pw, ph); + } + + if (id) + xd->plane[0].pre[0] = scaled_first_yv12; + + if (bestsme < last_besterr[id]) { + frame_mv[refs[id]].as_int = tmp_mv.as_int; + last_besterr[id] = bestsme; + } else { + break; + } + } + + // restore the predictor + if (scaled_ref_frame[0]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[0] = backup_yv12[i]; + } + + if (scaled_ref_frame[1]) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[1] = backup_second_yv12[i]; + } + + vpx_free(second_pred); +} + static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int64_t txfm_cache[], @@ -1920,145 +2150,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, ref_mv[1] = mbmi->ref_mvs[refs[1]][0]; if (is_comp_pred) { - if (cpi->sf.comp_inter_joint_serach) { - int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize); - int ite; - // Prediction buffer from second frame. - uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t)); - - // Do joint motion search in compound mode to get more accurate mv. - struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; - struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}}; - struct buf_2d scaled_first_yv12; - int last_besterr[2] = {INT_MAX, INT_MAX}; - - if (scaled_ref_frame[0]) { - int i; - - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - for (i = 0; i < MAX_MB_PLANE; i++) - backup_yv12[i] = xd->plane[i].pre[0]; - - setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col, - NULL, NULL); - } - - if (scaled_ref_frame[1]) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) - backup_second_yv12[i] = xd->plane[i].pre[1]; - - setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col, - NULL, NULL); - } - xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0], - mi_row, mi_col); - xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1], - mi_row, mi_col); - - scaled_first_yv12 = xd->plane[0].pre[0]; - - // Initialize mv using single prediction mode result. - frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; - frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - - // Allow joint search multiple times iteratively for each ref frame - // and break out the search loop if it couldn't find better mv. - for (ite = 0; ite < 4; ite++) { - struct buf_2d ref_yv12[2]; - int bestsme = INT_MAX; - int sadpb = x->sadperbit16; - int_mv tmp_mv; - int search_range = 3; - - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - int id = ite % 2; - - // Initialized here because of compiler problem in Visual Studio. - ref_yv12[0] = xd->plane[0].pre[0]; - ref_yv12[1] = xd->plane[0].pre[1]; - - // Get pred block from second frame. - vp9_build_inter_predictor(ref_yv12[!id].buf, - ref_yv12[!id].stride, - second_pred, pw, - &frame_mv[refs[!id]], - &xd->scale_factor[!id], - pw, ph, 0, - &xd->subpix); - - // Compound motion search on first ref frame. - if (id) - xd->plane[0].pre[0] = ref_yv12[id]; - vp9_clamp_mv_min_max(x, &ref_mv[id]); - - // Use mv result from single mode as mvp. - tmp_mv.as_int = frame_mv[refs[id]].as_int; - - tmp_mv.as_mv.col >>= 3; - tmp_mv.as_mv.row >>= 3; - - // Small-range full-pixel motion search - bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, - search_range, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &ref_mv[id], second_pred, - pw, ph); - - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - - if (bestsme < INT_MAX) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - - bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv, - &ref_mv[id], - x->errorperbit, - &cpi->fn_ptr[block_size], - x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, - pw, ph); - } - - if (id) - xd->plane[0].pre[0] = scaled_first_yv12; - - if (bestsme < last_besterr[id]) { - frame_mv[refs[id]].as_int = - xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int; - last_besterr[id] = bestsme; - } else { - break; - } - } - - // restore the predictor - if (scaled_ref_frame[0]) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[0] = backup_yv12[i]; - } + // Initialize mv using single prediction mode result. + frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; + frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - if (scaled_ref_frame[1]) { - int i; - - for (i = 0; i < MAX_MB_PLANE; i++) - xd->plane[i].pre[1] = backup_second_yv12[i]; - } - - vpx_free(second_pred); - } + if (cpi->sf.comp_inter_joint_search) + iterative_motion_search(cpi, x, bsize, frame_mv, scaled_ref_frame, + mi_row, mi_col, single_newmv); if (frame_mv[refs[0]].as_int == INVALID_MV || frame_mv[refs[1]].as_int == INVALID_MV) @@ -2134,8 +2232,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->nmvjointcost, x->mvcost, &dis, &sse); } - frame_mv[refs[0]].as_int = - xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int; + frame_mv[refs[0]].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; // Add the new motion vector cost to our rolling cost variable @@ -2191,7 +2288,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->mv[1].as_mv.col & 15) == 0; // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - if (1) { + if (cpi->speed > 4) { + *best_filter = EIGHTTAP; + } else { int i, newbest; int tmp_rate_sum = 0, tmp_dist_sum = 0; for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { @@ -2328,6 +2427,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Y cost and distortion super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, bsize, txfm_cache); + *rate2 += *rate_y; *distortion += *distortion_y; @@ -2393,16 +2493,14 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returnrate = rate4x4_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist4x4_y + (dist_uv >> 2); - for (i = 0; i < NB_TXFM_MODES; i++) { - ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); - } + vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff)); xd->mode_info_context->mbmi.txfm_size = TX_4X4; } else { *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { - ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); + ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode]; } xd->mode_info_context->mbmi.txfm_size = txfm_size; xd->mode_info_context->mbmi.mode = mode; @@ -2457,14 +2555,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, cpi->common.y_dc_delta_q); - int_mv seg_mvs[4][MAX_REF_FRAMES - 1]; + int_mv seg_mvs[4][MAX_REF_FRAMES]; union b_mode_info best_bmodes[4]; PARTITION_INFO best_partition; for (i = 0; i < 4; i++) { int j; - for (j = 0; j < MAX_REF_FRAMES - 1; j++) + for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV; } // Everywhere the flag is set the error is much higher than its neighbors. @@ -2563,11 +2661,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; - if (!(ref_frame == INTRA_FRAME - || (cpi->ref_frame_flags & flag_list[ref_frame]))) { - continue; - } - if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) { if (!(ref_frame_mask & (1 << ref_frame))) { continue; @@ -2585,6 +2678,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + if (!(ref_frame == INTRA_FRAME + || (cpi->ref_frame_flags & flag_list[ref_frame]))) { + continue; + } + if (!(mbmi->second_ref_frame == NONE + || (cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))) { + continue; + } + // TODO(jingning, jkoleszar): scaling reference frame not supported for // SPLITMV. if (mbmi->ref_frame > 0 && @@ -2680,8 +2782,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (this_mode == I4X4_PRED) { int rate; - // Note the rate value returned here includes the cost of coding - // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED]; mbmi->txfm_size = TX_4X4; rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, &distortion_y, INT64_MAX); @@ -2716,7 +2816,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, skippable = skippable && skip_uv[uv_tx]; mbmi->uv_mode = mode_uv[uv_tx]; - rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv; + rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv; if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; @@ -2755,7 +2855,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, second_ref, INT64_MAX, &rate, &rate_y, &distortion, &skippable, - (int)this_rd_thresh, seg_mvs); + (int)this_rd_thresh, seg_mvs, + mi_row, mi_col); if (cpi->common.mcomp_filter_type == SWITCHABLE) { const int rs = get_switchable_rate(cm, x); tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); @@ -2794,7 +2895,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, second_ref, INT64_MAX, &rate, &rate_y, &distortion, &skippable, - (int)this_rd_thresh, seg_mvs); + (int)this_rd_thresh, seg_mvs, + mi_row, mi_col); } else { if (cpi->common.mcomp_filter_type == SWITCHABLE) { int rs = get_switchable_rate(cm, x); @@ -2843,7 +2945,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, compmode_cost = vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); - mbmi->mode = this_mode; } else { YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL}; int fb = get_ref_frame_idx(cpi, mbmi->ref_frame); @@ -2938,14 +3039,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_mode = this_mode; } - // Store the respective mode distortions for later use. - if (mode_distortions[this_mode] == -1 - || distortion2 < mode_distortions[this_mode]) { - mode_distortions[this_mode] = distortion2; - } - if (frame_distortions[mbmi->ref_frame] == -1 - || distortion2 < frame_distortions[mbmi->ref_frame]) { - frame_distortions[mbmi->ref_frame] = distortion2; + if (this_mode != I4X4_PRED && this_mode != SPLITMV) { + // Store the respective mode distortions for later use. + if (mode_distortions[this_mode] == -1 + || distortion2 < mode_distortions[this_mode]) { + mode_distortions[this_mode] = distortion2; + } + if (frame_distortions[mbmi->ref_frame] == -1 + || distortion2 < frame_distortions[mbmi->ref_frame]) { + frame_distortions[mbmi->ref_frame] = distortion2; + } } // Did this mode help.. i.e. is it the new best mode @@ -2954,7 +3057,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Note index of best mode so far best_mode_index = mode_index; - if (this_mode <= I4X4_PRED) { + if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mbmi->mv[0].as_int = 0; } @@ -3052,8 +3155,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Flag all modes that have a distortion thats > 2x the best we found at // this level. for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { - if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV - || mode_index == SPLITMV) + if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV) continue; if (mode_distortions[mode_index] > 2 * *returndistortion) { @@ -3077,7 +3179,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, assert((cm->mcomp_filter_type == SWITCHABLE) || (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.mode <= I4X4_PRED)); + (best_mbmode.ref_frame == INTRA_FRAME)); // Accumulate filter usage stats // TODO(agrange): Use RD criteria to select interpolation filter mode. @@ -3129,13 +3231,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // macroblock modes vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - if (best_mbmode.mode == I4X4_PRED) { + if (best_mbmode.ref_frame == INTRA_FRAME && + best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { for (i = 0; i < 4; i++) { xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; } } - if (best_mbmode.mode == SPLITMV) { + if (best_mbmode.ref_frame != INTRA_FRAME && + best_mbmode.sb_type < BLOCK_SIZE_SB8X8) { for (i = 0; i < 4; i++) xd->mode_info_context->bmi[i].as_mv[0].as_int = best_bmodes[i].as_mv[0].as_int; |