summaryrefslogtreecommitdiff
path: root/vp9/encoder/vp9_rdopt.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder/vp9_rdopt.c')
-rw-r--r--vp9/encoder/vp9_rdopt.c922
1 files changed, 513 insertions, 409 deletions
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index aa0557735..862e72f24 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -46,6 +46,12 @@
/* Factor to weigh the rate for switchable interp filters */
#define SWITCHABLE_INTERP_RATE_FACTOR 1
+DECLARE_ALIGNED(16, extern const uint8_t,
+ vp9_pt_energy_class[MAX_ENTROPY_TOKENS]);
+
+#define I4X4_PRED 0x8000
+#define SPLITMV 0x10000
+
const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{ZEROMV, LAST_FRAME, NONE},
{DC_PRED, INTRA_FRAME, NONE},
@@ -81,7 +87,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{SPLITMV, GOLDEN_FRAME, NONE},
{SPLITMV, ALTREF_FRAME, NONE},
- {I4X4_PRED, INTRA_FRAME, NONE},
+ {I4X4_PRED, INTRA_FRAME, NONE},
/* compound prediction modes */
{ZEROMV, LAST_FRAME, GOLDEN_FRAME},
@@ -105,11 +111,31 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{SPLITMV, GOLDEN_FRAME, ALTREF_FRAME},
};
+#if CONFIG_BALANCED_COEFTREE
+static void fill_token_costs(vp9_coeff_count *c,
+ vp9_coeff_count *cnoskip,
+ vp9_coeff_probs_model *p,
+ TX_SIZE tx_size) {
+ int i, j, k, l;
+ for (i = 0; i < BLOCK_TYPES; i++)
+ for (j = 0; j < REF_TYPES; j++)
+ for (k = 0; k < COEF_BANDS; k++)
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
+ vp9_prob probs[ENTROPY_NODES];
+ vp9_model_to_full_probs(p[i][j][k][l], probs);
+ vp9_cost_tokens((int *)cnoskip[i][j][k][l], probs,
+ vp9_coef_tree);
+ // Replace the eob node prob with a very small value so that the
+ // cost approximately equals the cost without the eob node
+ probs[1] = 1;
+ vp9_cost_tokens((int *)c[i][j][k][l], probs, vp9_coef_tree);
+ }
+}
+#else
static void fill_token_costs(vp9_coeff_count *c,
vp9_coeff_probs_model *p,
TX_SIZE tx_size) {
int i, j, k, l;
-
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
@@ -120,6 +146,7 @@ static void fill_token_costs(vp9_coeff_count *c,
vp9_coef_tree);
}
}
+#endif
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -210,6 +237,20 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
}
}
+#if CONFIG_BALANCED_COEFTREE
+ fill_token_costs(cpi->mb.token_costs[TX_4X4],
+ cpi->mb.token_costs_noskip[TX_4X4],
+ cpi->common.fc.coef_probs_4x4, TX_4X4);
+ fill_token_costs(cpi->mb.token_costs[TX_8X8],
+ cpi->mb.token_costs_noskip[TX_8X8],
+ cpi->common.fc.coef_probs_8x8, TX_8X8);
+ fill_token_costs(cpi->mb.token_costs[TX_16X16],
+ cpi->mb.token_costs_noskip[TX_16X16],
+ cpi->common.fc.coef_probs_16x16, TX_16X16);
+ fill_token_costs(cpi->mb.token_costs[TX_32X32],
+ cpi->mb.token_costs_noskip[TX_32X32],
+ cpi->common.fc.coef_probs_32x32, TX_32X32);
+#else
fill_token_costs(cpi->mb.token_costs[TX_4X4],
cpi->common.fc.coef_probs_4x4, TX_4X4);
fill_token_costs(cpi->mb.token_costs[TX_8X8],
@@ -218,6 +259,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
cpi->common.fc.coef_probs_16x16, TX_16X16);
fill_token_costs(cpi->mb.token_costs[TX_32X32],
cpi->common.fc.coef_probs_32x32, TX_32X32);
+#endif
for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
vp9_cost_tokens(cpi->mb.partition_cost[i],
@@ -225,7 +267,6 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
vp9_partition_tree);
/*rough estimate for costing*/
- cpi->common.kf_ymode_probs_index = cpi->common.base_qindex >> 4;
vp9_init_mode_costs(cpi);
if (cpi->common.frame_type != KEY_FRAME) {
@@ -271,7 +312,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
+#if CONFIG_BALANCED_COEFTREE
+ unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
+ mb->token_costs_noskip[tx_size][type][ref];
+#else
vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
+#endif
+
int seg_eob, default_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
@@ -291,8 +338,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
coef_probs);
+#endif
seg_eob = 16;
scan = get_scan_4x4(tx_type);
band_translate = vp9_coefband_trans_4x4;
@@ -307,8 +356,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
scan = get_scan_8x8(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
coef_probs);
+#endif
seg_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
@@ -320,8 +371,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
scan = get_scan_16x16(tx_type);
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
coef_probs);
+#endif
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
@@ -330,8 +383,10 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
}
case TX_32X32:
scan = vp9_default_scan_32x32;
+#if !CONFIG_BALANCED_COEFTREE
vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
coef_probs);
+#endif
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
@@ -362,18 +417,30 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ if (!c || token_cache[scan[c - 1]]) // do not skip eob
+ cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
+ else
+ cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
+#else
cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
-
if (!c || token_cache[scan[c - 1]])
cost += vp9_cost_bit(coef_probs[band][pt][0], 1);
- token_cache[scan[c]] = t;
+#endif
+ token_cache[scan[c]] = vp9_pt_energy_class[t];
}
if (c < seg_eob) {
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+#if CONFIG_BALANCED_COEFTREE
+ cost += mb->token_costs_noskip[tx_size][type][ref]
+ [get_coef_band(band_translate, c)]
+ [pt][DCT_EOB_TOKEN];
+#else
cost += mb->token_costs[tx_size][type][ref]
[get_coef_band(band_translate, c)]
[pt][DCT_EOB_TOKEN];
+#endif
}
}
@@ -556,9 +623,25 @@ static void super_block_yrd(VP9_COMP *cpi,
int64_t txfm_cache[NB_TXFM_MODES]) {
VP9_COMMON *const cm = &cpi->common;
int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB];
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi;
vp9_subtract_sby(x, bs);
+ if (cpi->speed > 4) {
+ if (bs >= BLOCK_SIZE_SB32X32) {
+ mbmi->txfm_size = TX_32X32;
+ } else if (bs >= BLOCK_SIZE_MB16X16) {
+ mbmi->txfm_size = TX_16X16;
+ } else if (bs >= BLOCK_SIZE_SB8X8) {
+ mbmi->txfm_size = TX_8X8;
+ } else {
+ mbmi->txfm_size = TX_4X4;
+ }
+ super_block_yrd_for_txfm(cm, x, rate, distortion, skip, bs,
+ mbmi->txfm_size);
+ return;
+ }
if (bs >= BLOCK_SIZE_SB32X32)
super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
bs, TX_32X32);
@@ -611,11 +694,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
int64_t this_rd;
int ratey = 0;
- xd->mode_info_context->bmi[ib].as_mode.first = mode;
- if (cm->frame_type == KEY_FRAME)
- rate = bmode_costs[mode];
- else
- rate = x->mbmode_cost[cm->frame_type][mode];
+ rate = bmode_costs[mode];
distortion = 0;
vpx_memcpy(tempa, ta, sizeof(ta));
@@ -653,9 +732,6 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
block, 16), 16) >> 2;
- vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
- dst, xd->plane[0].dst.stride);
-
if (best_tx_type != DCT_DCT)
vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
dst, xd->plane[0].dst.stride, best_tx_type);
@@ -726,16 +802,15 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
int *bmode_costs;
+ MODE_INFO *const mic = xd->mode_info_context;
vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
- xd->mode_info_context->mbmi.mode = I4X4_PRED;
- bmode_costs = mb->inter_bmode_costs;
+ bmode_costs = mb->mbmode_cost;
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
- MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
@@ -747,7 +822,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
left_block_mode(mic, i) : DC_PRED;
- bmode_costs = mb->bmode_costs[A][L];
+ bmode_costs = mb->y_mode_costs[A][L];
}
total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
@@ -774,6 +849,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
*Rate = cost;
*rate_y = tot_rate_y;
*Distortion = distortion;
+ xd->mode_info_context->mbmi.mode = mic->bmi[3].as_mode.first;
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
@@ -785,12 +861,13 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
- MACROBLOCKD *xd = &x->e_mbd;
+ MACROBLOCKD *const xd = &x->e_mbd;
int this_rate, this_rate_tokenonly;
int this_distortion, s;
int64_t best_rd = INT64_MAX, this_rd;
TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
int i;
+ int *bmode_costs = x->mbmode_cost;
if (bsize < BLOCK_SIZE_SB8X8) {
x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
@@ -805,17 +882,19 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t local_txfm_cache[NB_TXFM_MODES];
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
- const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
- const MB_PREDICTION_MODE L = xd->left_available ?
- left_block_mode(mic, 0) : DC_PRED;
-
- int *bmode_costs = x->bmode_costs[A][L];
+ if (cpi->common.frame_type == KEY_FRAME) {
+ const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
+ const MB_PREDICTION_MODE L = xd->left_available ?
+ left_block_mode(mic, 0) : DC_PRED;
+ bmode_costs = x->y_mode_costs[A][L];
+ }
x->e_mbd.mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
bsize, local_txfm_cache);
+
this_rate = this_rate_tokenonly + bmode_costs[mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
@@ -925,10 +1004,10 @@ int vp9_cost_mv_ref(VP9_COMP *cpi,
VP9_COMMON *pc = &cpi->common;
vp9_prob p [VP9_MVREFS - 1];
- assert(NEARESTMV <= m && m <= SPLITMV);
+ assert(NEARESTMV <= m && m <= NEWMV);
vp9_mv_ref_probs(pc, p, mode_context);
- return cost_token(vp9_mv_ref_tree, p,
- vp9_mv_ref_encoding_array - NEARESTMV + m);
+ return cost_token(vp9_sb_mv_ref_tree, p,
+ vp9_sb_mv_ref_encoding_array - NEARESTMV + m);
} else
return 0;
}
@@ -938,19 +1017,18 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
}
-static int labels2mode(MACROBLOCK *x,
- int const *labelings, int which_label,
+static int labels2mode(MACROBLOCK *x, int i,
MB_PREDICTION_MODE this_mode,
int_mv *this_mv, int_mv *this_second_mv,
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
- int_mv seg_mvs[MAX_REF_FRAMES - 1],
+ int_mv seg_mvs[MAX_REF_FRAMES],
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mode_info_context;
MB_MODE_INFO * mbmi = &mic->mbmi;
- int i, cost = 0, thismvcost = 0;
+ int cost = 0, thismvcost = 0;
int idx, idy;
int bw = 1 << b_width_log2(mbmi->sb_type);
int bh = 1 << b_height_log2(mbmi->sb_type);
@@ -958,72 +1036,61 @@ static int labels2mode(MACROBLOCK *x,
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
as they have not yet made it to the bmi array in our MB_MODE_INFO. */
- for (i = 0; i < 4; ++i) {
- MB_PREDICTION_MODE m;
-
- if (labelings[i] != which_label)
- continue;
+ MB_PREDICTION_MODE m;
- {
- // the only time we should do costing for new motion vector or mode
- // is when we are on a new label (jbb May 08, 2007)
- switch (m = this_mode) {
- case NEWMV:
- if (mbmi->second_ref_frame > 0) {
- this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int;
- this_second_mv->as_int =
- seg_mvs[mbmi->second_ref_frame - 1].as_int;
- }
-
- thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
- 102, xd->allow_high_precision_mv);
- if (mbmi->second_ref_frame > 0) {
- thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
- mvjcost, mvcost, 102,
- xd->allow_high_precision_mv);
- }
- break;
- case NEARESTMV:
- this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int;
- if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int =
- frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int;
- break;
- case NEARMV:
- this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int;
- if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int =
- frame_mv[NEARMV][mbmi->second_ref_frame].as_int;
- break;
- case ZEROMV:
- this_mv->as_int = 0;
- if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = 0;
- break;
- default:
- break;
+ // the only time we should do costing for new motion vector or mode
+ // is when we are on a new label (jbb May 08, 2007)
+ switch (m = this_mode) {
+ case NEWMV:
+ this_mv->as_int = seg_mvs[mbmi->ref_frame].as_int;
+ thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
+ 102, xd->allow_high_precision_mv);
+ if (mbmi->second_ref_frame > 0) {
+ this_second_mv->as_int = seg_mvs[mbmi->second_ref_frame].as_int;
+ thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv,
+ mvjcost, mvcost, 102,
+ xd->allow_high_precision_mv);
}
+ break;
+ case NEARESTMV:
+ this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame].as_int;
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int =
+ frame_mv[NEARESTMV][mbmi->second_ref_frame].as_int;
+ break;
+ case NEARMV:
+ this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame].as_int;
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int =
+ frame_mv[NEARMV][mbmi->second_ref_frame].as_int;
+ break;
+ case ZEROMV:
+ this_mv->as_int = 0;
+ if (mbmi->second_ref_frame > 0)
+ this_second_mv->as_int = 0;
+ break;
+ default:
+ break;
+ }
- cost = vp9_cost_mv_ref(cpi, this_mode,
- mbmi->mb_mode_context[mbmi->ref_frame]);
- }
+ cost = vp9_cost_mv_ref(cpi, this_mode,
+ mbmi->mb_mode_context[mbmi->ref_frame]);
- mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
- if (mbmi->second_ref_frame > 0)
- mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
+ mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
+ if (mbmi->second_ref_frame > 0)
+ mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
- x->partition_info->bmi[i].mode = m;
- x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
- if (mbmi->second_ref_frame > 0)
- x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
- vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
- &mic->bmi[i], sizeof(mic->bmi[i]));
- vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
- &x->partition_info->bmi[i],
- sizeof(x->partition_info->bmi[i]));
- }
+ x->partition_info->bmi[i].mode = m;
+ x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
+ if (mbmi->second_ref_frame > 0)
+ x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
+ &mic->bmi[i], sizeof(mic->bmi[i]));
+ vpx_memcpy(&x->partition_info->bmi[i + idy * 2 + idx],
+ &x->partition_info->bmi[i],
+ sizeof(x->partition_info->bmi[i]));
}
}
@@ -1033,90 +1100,86 @@ static int labels2mode(MACROBLOCK *x,
static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
MACROBLOCK *x,
- int const *labels,
- int which_label,
+ int i,
int *labelyrate,
int *distortion,
ENTROPY_CONTEXT *ta,
ENTROPY_CONTEXT *tl) {
- int i, k;
+ int k;
MACROBLOCKD *xd = &x->e_mbd;
BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
+ const int src_stride = x->plane[0].src.stride;
+ uint8_t* const src =
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src.buf, src_stride);
+ int16_t* src_diff =
+ raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src_diff);
+ int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
+ uint8_t* const pre =
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ uint8_t* const dst =
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride);
+ int thisdistortion = 0;
+ int thisrate = 0;
*labelyrate = 0;
*distortion = 0;
- for (i = 0; i < 4; i++) {
- if (labels[i] == which_label) {
- const int src_stride = x->plane[0].src.stride;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- x->plane[0].src.buf, src_stride);
- int16_t* src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
- x->plane[0].src_diff);
- int16_t* coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
- uint8_t* const pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
- int thisdistortion = 0;
- int thisrate = 0;
-
- vp9_build_inter_predictor(pre,
- xd->plane[0].pre[0].stride,
- dst,
- xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[i].as_mv[0],
- &xd->scale_factor[0],
- 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
-
- // TODO(debargha): Make this work properly with the
- // implicit-compoundinter-weight experiment when implicit
- // weighting for splitmv modes is turned on.
- if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
- uint8_t* const second_pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
- xd->plane[0].pre[1].buf,
- xd->plane[0].pre[1].stride);
- vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
- dst, xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[i].as_mv[1],
- &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
- &xd->subpix);
- }
- vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
- src, src_stride,
- dst, xd->plane[0].dst.stride);
+ vp9_build_inter_predictor(pre,
+ xd->plane[0].pre[0].stride,
+ dst,
+ xd->plane[0].dst.stride,
+ &xd->mode_info_context->bmi[i].as_mv[0],
+ &xd->scale_factor[0],
+ 4 * bw, 4 * bh, 0 /* no avg */, &xd->subpix);
+
+ // TODO(debargha): Make this work properly with the
+ // implicit-compoundinter-weight experiment when implicit
+ // weighting for splitmv modes is turned on.
+ if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
+ uint8_t* const second_pre =
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].pre[1].buf,
+ xd->plane[0].pre[1].stride);
+ vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
+ dst, xd->plane[0].dst.stride,
+ &xd->mode_info_context->bmi[i].as_mv[1],
+ &xd->scale_factor[1], 4 * bw, 4 * bh, 1,
+ &xd->subpix);
+ }
- k = i;
- for (idy = 0; idy < bh; ++idy) {
- for (idx = 0; idx < bw; ++idx) {
- k += (idy * 2 + idx);
- src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
- x->plane[0].src_diff);
- coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
- x->fwd_txm4x4(src_diff, coeff, 16);
- x->quantize_b_4x4(x, k, DCT_DCT, 16);
- thisdistortion += vp9_block_error(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff,
- k, 16), 16);
- thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
- ta + (k & 1),
- tl + (k >> 1), TX_4X4, 16);
- }
- }
- *distortion += thisdistortion;
- *labelyrate += thisrate;
+ vp9_subtract_block(4 * bh, 4 * bw, src_diff, 8,
+ src, src_stride,
+ dst, xd->plane[0].dst.stride);
+
+ k = i;
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ k += (idy * 2 + idx);
+ src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, k,
+ x->plane[0].src_diff);
+ coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, k);
+ x->fwd_txm4x4(src_diff, coeff, 16);
+ x->quantize_b_4x4(x, k, DCT_DCT, 16);
+ thisdistortion += vp9_block_error(coeff,
+ BLOCK_OFFSET(xd->plane[0].dqcoeff,
+ k, 16), 16);
+ thisrate += cost_coeffs(cm, x, 0, k, PLANE_TYPE_Y_WITH_DC,
+ ta + (k & 1),
+ tl + (k >> 1), TX_4X4, 16);
}
}
+ *distortion += thisdistortion;
+ *labelyrate += thisrate;
+
*distortion >>= 2;
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
@@ -1188,11 +1251,45 @@ static enum BlockSize get_block_size(int bw, int bh) {
return -1;
}
+static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
+ MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
+ x->plane[0].src.buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src.buf,
+ x->plane[0].src.stride);
+ assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
+ x->e_mbd.plane[0].pre[0].buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
+ x->e_mbd.plane[0].pre[0].buf,
+ x->e_mbd.plane[0].pre[0].stride);
+ if (mbmi->second_ref_frame)
+ x->e_mbd.plane[0].pre[1].buf =
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, i,
+ x->e_mbd.plane[0].pre[1].buf,
+ x->e_mbd.plane[0].pre[1].stride);
+}
+
+static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
+ struct buf_2d orig_pre[2]) {
+ MB_MODE_INFO *mbmi = &x->e_mbd.mode_info_context->mbmi;
+ x->plane[0].src = orig_src;
+ x->e_mbd.plane[0].pre[0] = orig_pre[0];
+ if (mbmi->second_ref_frame)
+ x->e_mbd.plane[0].pre[1] = orig_pre[1];
+}
+
+static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize,
+ int_mv *frame_mv,
+ YV12_BUFFER_CONFIG **scaled_ref_frame,
+ int mi_row, int mi_col,
+ int_mv single_newmv[MAX_REF_FRAMES]);
+
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
- int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
+ int_mv seg_mvs[4][MAX_REF_FRAMES],
+ int mi_row, int mi_col) {
int i, j;
- static const int labels[4] = { 0, 1, 2, 3 };
int br = 0, bd = 0;
MB_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -1208,7 +1305,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
vp9_variance_fn_ptr_t *v_fn_ptr;
-
+ YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
ENTROPY_CONTEXT t_above[4], t_left[4];
ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
@@ -1255,18 +1352,21 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
int distortion;
int labelyrate;
ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
+ const struct buf_2d orig_src = x->plane[0].src;
+ struct buf_2d orig_pre[2];
+
+ vpx_memcpy(orig_pre, x->e_mbd.plane[0].pre, sizeof(orig_pre));
vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
// motion search for newmv (single predictor case only)
if (mbmi->second_ref_frame <= 0 && this_mode == NEWMV) {
- int sseshift, n;
int step_param = 0;
int further_steps;
int thissme, bestsme = INT_MAX;
- const struct buf_2d orig_src = x->plane[0].src;
- const struct buf_2d orig_pre = x->e_mbd.plane[0].pre[0];
+ int sadpb = x->sadperbit4;
+ int_mv mvp_full;
/* Is the best so far sufficiently good that we cant justify doing
* and new motion search. */
@@ -1287,55 +1387,35 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
- {
- int sadpb = x->sadperbit4;
- int_mv mvp_full;
-
- mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
- mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
-
- // find first label
- n = i;
-
- // adjust src pointer for this segment
- x->plane[0].src.buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
- x->plane[0].src.buf,
- x->plane[0].src.stride);
- assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
- x->e_mbd.plane[0].pre[0].buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
- x->e_mbd.plane[0].pre[0].buf,
- x->e_mbd.plane[0].pre[0].stride);
-
- bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
- sadpb, further_steps, 0, v_fn_ptr,
- bsi->ref_mv, &mode_mv[NEWMV]);
-
- sseshift = 0;
-
- // Should we do a full search (best quality only)
- if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
- /* Check if mvp_full is within the range. */
- clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
- x->mv_row_min, x->mv_row_max);
-
- thissme = cpi->full_search_sad(x, &mvp_full,
- sadpb, 16, v_fn_ptr,
- x->nmvjointcost, x->mvcost,
- bsi->ref_mv,
- n);
-
- if (thissme < bestsme) {
- bestsme = thissme;
- mode_mv[NEWMV].as_int =
- x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
- } else {
- /* The full search result is actually worse so re-instate the
- * previous best vector */
- x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
- mode_mv[NEWMV].as_int;
- }
+ mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
+ mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
+
+ // adjust src pointer for this block
+ mi_buf_shift(x, i);
+ bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
+ sadpb, further_steps, 0, v_fn_ptr,
+ bsi->ref_mv, &mode_mv[NEWMV]);
+
+ // Should we do a full search (best quality only)
+ if (cpi->compressor_speed == 0) {
+ /* Check if mvp_full is within the range. */
+ clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
+ x->mv_row_min, x->mv_row_max);
+
+ thissme = cpi->full_search_sad(x, &mvp_full,
+ sadpb, 16, v_fn_ptr,
+ x->nmvjointcost, x->mvcost,
+ bsi->ref_mv, i);
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEWMV].as_int =
+ x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int;
+ } else {
+ /* The full search result is actually worse so re-instate the
+ * previous best vector */
+ x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int =
+ mode_mv[NEWMV].as_int;
}
}
@@ -1348,23 +1428,32 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
&distortion, &sse);
// safe motion search result for use in compound prediction
- seg_mvs[i][mbmi->ref_frame - 1].as_int = mode_mv[NEWMV].as_int;
+ seg_mvs[i][mbmi->ref_frame].as_int = mode_mv[NEWMV].as_int;
}
// restore src pointers
- x->plane[0].src = orig_src;
- x->e_mbd.plane[0].pre[0] = orig_pre;
+ mi_buf_restore(x, orig_src, orig_pre);
} else if (mbmi->second_ref_frame > 0 && this_mode == NEWMV) {
- /* NEW4X4 */
- /* motion search not completed? Then skip newmv for this block with
- * comppred */
- if (seg_mvs[i][mbmi->second_ref_frame - 1].as_int == INVALID_MV ||
- seg_mvs[i][mbmi->ref_frame - 1].as_int == INVALID_MV) {
+ if (seg_mvs[i][mbmi->second_ref_frame].as_int == INVALID_MV ||
+ seg_mvs[i][mbmi->ref_frame ].as_int == INVALID_MV)
continue;
+
+ // adjust src pointers
+ mi_buf_shift(x, i);
+ if (cpi->sf.comp_inter_joint_search) {
+ iterative_motion_search(cpi, x, bsize, frame_mv[this_mode],
+ scaled_ref_frame,
+ mi_row, mi_col, seg_mvs[i]);
+ seg_mvs[i][mbmi->ref_frame].as_int =
+ frame_mv[this_mode][mbmi->ref_frame].as_int;
+ seg_mvs[i][mbmi->second_ref_frame].as_int =
+ frame_mv[this_mode][mbmi->second_ref_frame].as_int;
}
+ // restore src pointers
+ mi_buf_restore(x, orig_src, orig_pre);
}
- rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode],
+ rate = labels2mode(x, i, this_mode, &mode_mv[this_mode],
&second_mode_mv[this_mode], frame_mv, seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
@@ -1381,7 +1470,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
continue;
this_rd = encode_inter_mb_segment(&cpi->common,
- x, labels, i, &labelyrate,
+ x, i, &labelyrate,
&distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@@ -1392,10 +1481,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
- for (j = 0; j < 4; j++)
- if (labels[j] == i)
- best_eobs[j] = x->e_mbd.plane[0].eobs[j];
-
+ best_eobs[i] = x->e_mbd.plane[0].eobs[i];
vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
}
@@ -1404,7 +1490,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
vpx_memcpy(t_above, t_above_b, sizeof(t_above));
vpx_memcpy(t_left, t_left_b, sizeof(t_left));
- labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected],
+ labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
&second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
x->mvcost, cpi);
@@ -1443,12 +1529,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
}
}
-static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
- BEST_SEG_INFO *bsi,
- int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
- rd_check_segment_txsize(cpi, x, bsi, seg_mvs);
-}
-
static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int_mv *best_ref_mv,
int_mv *second_best_ref_mv,
@@ -1457,7 +1537,8 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int *returnyrate,
int *returndistortion,
int *skippable, int mvthresh,
- int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
+ int_mv seg_mvs[4][MAX_REF_FRAMES],
+ int mi_row, int mi_col) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -1473,7 +1554,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < 4; i++)
bsi.modes[i] = ZEROMV;
- rd_check_segment(cpi, x, &bsi, seg_mvs);
+ rd_check_segment_txsize(cpi, x, &bsi, seg_mvs, mi_row, mi_col);
/* set it to the best */
for (i = 0; i < 4; i++) {
@@ -1504,6 +1585,7 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
*skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
+ mbmi->mode = bsi.modes[3];
return (int)(bsi.segment_rd);
}
@@ -1878,6 +1960,154 @@ static INLINE int get_switchable_rate(VP9_COMMON *cm, MACROBLOCK *x) {
return SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
+static void iterative_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize,
+ int_mv *frame_mv,
+ YV12_BUFFER_CONFIG **scaled_ref_frame,
+ int mi_row, int mi_col,
+ int_mv single_newmv[MAX_REF_FRAMES]) {
+ int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
+ MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
+ int refs[2] = { mbmi->ref_frame,
+ (mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame) };
+ int_mv ref_mv[2];
+ const enum BlockSize block_size = get_plane_block_size(bsize, &xd->plane[0]);
+ int ite;
+ // Prediction buffer from second frame.
+ uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
+
+ // Do joint motion search in compound mode to get more accurate mv.
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+ struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
+ struct buf_2d scaled_first_yv12;
+ int last_besterr[2] = {INT_MAX, INT_MAX};
+
+ ref_mv[0] = mbmi->ref_mvs[refs[0]][0];
+ ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
+
+ if (scaled_ref_frame[0]) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[0];
+ setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
+ NULL, NULL);
+ }
+
+ if (scaled_ref_frame[1]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_second_yv12[i] = xd->plane[i].pre[1];
+
+ setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
+ NULL, NULL);
+ }
+
+ xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
+ mi_row, mi_col);
+ xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
+ mi_row, mi_col);
+ scaled_first_yv12 = xd->plane[0].pre[0];
+
+ // Initialize mv using single prediction mode result.
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
+
+ // Allow joint search multiple times iteratively for each ref frame
+ // and break out the search loop if it couldn't find better mv.
+ for (ite = 0; ite < 4; ite++) {
+ struct buf_2d ref_yv12[2];
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit16;
+ int_mv tmp_mv;
+ int search_range = 3;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+ int id = ite % 2;
+
+ // Initialized here because of compiler problem in Visual Studio.
+ ref_yv12[0] = xd->plane[0].pre[0];
+ ref_yv12[1] = xd->plane[0].pre[1];
+
+ // Get pred block from second frame.
+ vp9_build_inter_predictor(ref_yv12[!id].buf,
+ ref_yv12[!id].stride,
+ second_pred, pw,
+ &frame_mv[refs[!id]],
+ &xd->scale_factor[!id],
+ pw, ph, 0,
+ &xd->subpix);
+
+ // Compound motion search on first ref frame.
+ if (id)
+ xd->plane[0].pre[0] = ref_yv12[id];
+ vp9_clamp_mv_min_max(x, &ref_mv[id]);
+
+ // Use mv result from single mode as mvp.
+ tmp_mv.as_int = frame_mv[refs[id]].as_int;
+
+ tmp_mv.as_mv.col >>= 3;
+ tmp_mv.as_mv.row >>= 3;
+
+ // Small-range full-pixel motion search
+ bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
+ search_range,
+ &cpi->fn_ptr[block_size],
+ x->nmvjointcost, x->mvcost,
+ &ref_mv[id], second_pred,
+ pw, ph);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+
+ bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
+ &ref_mv[id],
+ x->errorperbit,
+ &cpi->fn_ptr[block_size],
+ x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred,
+ pw, ph);
+ }
+
+ if (id)
+ xd->plane[0].pre[0] = scaled_first_yv12;
+
+ if (bestsme < last_besterr[id]) {
+ frame_mv[refs[id]].as_int = tmp_mv.as_int;
+ last_besterr[id] = bestsme;
+ } else {
+ break;
+ }
+ }
+
+ // restore the predictor
+ if (scaled_ref_frame[0]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
+ }
+
+ if (scaled_ref_frame[1]) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = backup_second_yv12[i];
+ }
+
+ vpx_free(second_pred);
+}
+
static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize,
int64_t txfm_cache[],
@@ -1920,145 +2150,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
if (is_comp_pred) {
- if (cpi->sf.comp_inter_joint_serach) {
- int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
- int ite;
- // Prediction buffer from second frame.
- uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
-
- // Do joint motion search in compound mode to get more accurate mv.
- struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d scaled_first_yv12;
- int last_besterr[2] = {INT_MAX, INT_MAX};
-
- if (scaled_ref_frame[0]) {
- int i;
-
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[0];
-
- setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
- NULL, NULL);
- }
-
- if (scaled_ref_frame[1]) {
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_second_yv12[i] = xd->plane[i].pre[1];
-
- setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
- NULL, NULL);
- }
- xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
- mi_row, mi_col);
- xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
- mi_row, mi_col);
-
- scaled_first_yv12 = xd->plane[0].pre[0];
-
- // Initialize mv using single prediction mode result.
- frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
- frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
-
- // Allow joint search multiple times iteratively for each ref frame
- // and break out the search loop if it couldn't find better mv.
- for (ite = 0; ite < 4; ite++) {
- struct buf_2d ref_yv12[2];
- int bestsme = INT_MAX;
- int sadpb = x->sadperbit16;
- int_mv tmp_mv;
- int search_range = 3;
-
- int tmp_col_min = x->mv_col_min;
- int tmp_col_max = x->mv_col_max;
- int tmp_row_min = x->mv_row_min;
- int tmp_row_max = x->mv_row_max;
- int id = ite % 2;
-
- // Initialized here because of compiler problem in Visual Studio.
- ref_yv12[0] = xd->plane[0].pre[0];
- ref_yv12[1] = xd->plane[0].pre[1];
-
- // Get pred block from second frame.
- vp9_build_inter_predictor(ref_yv12[!id].buf,
- ref_yv12[!id].stride,
- second_pred, pw,
- &frame_mv[refs[!id]],
- &xd->scale_factor[!id],
- pw, ph, 0,
- &xd->subpix);
-
- // Compound motion search on first ref frame.
- if (id)
- xd->plane[0].pre[0] = ref_yv12[id];
- vp9_clamp_mv_min_max(x, &ref_mv[id]);
-
- // Use mv result from single mode as mvp.
- tmp_mv.as_int = frame_mv[refs[id]].as_int;
-
- tmp_mv.as_mv.col >>= 3;
- tmp_mv.as_mv.row >>= 3;
-
- // Small-range full-pixel motion search
- bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
- search_range,
- &cpi->fn_ptr[block_size],
- x->nmvjointcost, x->mvcost,
- &ref_mv[id], second_pred,
- pw, ph);
-
- x->mv_col_min = tmp_col_min;
- x->mv_col_max = tmp_col_max;
- x->mv_row_min = tmp_row_min;
- x->mv_row_max = tmp_row_max;
-
- if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
-
- bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
- &ref_mv[id],
- x->errorperbit,
- &cpi->fn_ptr[block_size],
- x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
- pw, ph);
- }
-
- if (id)
- xd->plane[0].pre[0] = scaled_first_yv12;
-
- if (bestsme < last_besterr[id]) {
- frame_mv[refs[id]].as_int =
- xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int;
- last_besterr[id] = bestsme;
- } else {
- break;
- }
- }
-
- // restore the predictor
- if (scaled_ref_frame[0]) {
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
+ // Initialize mv using single prediction mode result.
+ frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
- if (scaled_ref_frame[1]) {
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[1] = backup_second_yv12[i];
- }
-
- vpx_free(second_pred);
- }
+ if (cpi->sf.comp_inter_joint_search)
+ iterative_motion_search(cpi, x, bsize, frame_mv, scaled_ref_frame,
+ mi_row, mi_col, single_newmv);
if (frame_mv[refs[0]].as_int == INVALID_MV ||
frame_mv[refs[1]].as_int == INVALID_MV)
@@ -2134,8 +2232,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->nmvjointcost, x->mvcost,
&dis, &sse);
}
- frame_mv[refs[0]].as_int =
- xd->mode_info_context->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
+ frame_mv[refs[0]].as_int = tmp_mv.as_int;
single_newmv[refs[0]].as_int = tmp_mv.as_int;
// Add the new motion vector cost to our rolling cost variable
@@ -2191,7 +2288,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(mbmi->mv[1].as_mv.col & 15) == 0;
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
- if (1) {
+ if (cpi->speed > 4) {
+ *best_filter = EIGHTTAP;
+ } else {
int i, newbest;
int tmp_rate_sum = 0, tmp_dist_sum = 0;
for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
@@ -2328,6 +2427,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Y cost and distortion
super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y,
bsize, txfm_cache);
+
*rate2 += *rate_y;
*distortion += *distortion_y;
@@ -2393,16 +2493,14 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returnrate = rate4x4_y + rate_uv +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist4x4_y + (dist_uv >> 2);
- for (i = 0; i < NB_TXFM_MODES; i++) {
- ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
- }
+ vpx_memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
} else {
*returnrate = rate_y + rate_uv +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
for (i = 0; i < NB_TXFM_MODES; i++) {
- ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
+ ctx->txfm_rd_diff[i] = txfm_cache[i] - txfm_cache[cm->txfm_mode];
}
xd->mode_info_context->mbmi.txfm_size = txfm_size;
xd->mode_info_context->mbmi.mode = mode;
@@ -2457,14 +2555,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
cpi->common.y_dc_delta_q);
- int_mv seg_mvs[4][MAX_REF_FRAMES - 1];
+ int_mv seg_mvs[4][MAX_REF_FRAMES];
union b_mode_info best_bmodes[4];
PARTITION_INFO best_partition;
for (i = 0; i < 4; i++) {
int j;
- for (j = 0; j < MAX_REF_FRAMES - 1; j++)
+ for (j = 0; j < MAX_REF_FRAMES; j++)
seg_mvs[i][j].as_int = INVALID_MV;
}
// Everywhere the flag is set the error is much higher than its neighbors.
@@ -2563,11 +2661,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame;
- if (!(ref_frame == INTRA_FRAME
- || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
- continue;
- }
-
if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
if (!(ref_frame_mask & (1 << ref_frame))) {
continue;
@@ -2585,6 +2678,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame = ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+ if (!(ref_frame == INTRA_FRAME
+ || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
+ continue;
+ }
+ if (!(mbmi->second_ref_frame == NONE
+ || (cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))) {
+ continue;
+ }
+
// TODO(jingning, jkoleszar): scaling reference frame not supported for
// SPLITMV.
if (mbmi->ref_frame > 0 &&
@@ -2680,8 +2782,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (this_mode == I4X4_PRED) {
int rate;
- // Note the rate value returned here includes the cost of coding
- // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED];
mbmi->txfm_size = TX_4X4;
rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
&distortion_y, INT64_MAX);
@@ -2716,7 +2816,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
skippable = skippable && skip_uv[uv_tx];
mbmi->uv_mode = mode_uv[uv_tx];
- rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
+ rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv;
if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
@@ -2755,7 +2855,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
second_ref, INT64_MAX,
&rate, &rate_y, &distortion,
&skippable,
- (int)this_rd_thresh, seg_mvs);
+ (int)this_rd_thresh, seg_mvs,
+ mi_row, mi_col);
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
const int rs = get_switchable_rate(cm, x);
tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
@@ -2794,7 +2895,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
second_ref, INT64_MAX,
&rate, &rate_y, &distortion,
&skippable,
- (int)this_rd_thresh, seg_mvs);
+ (int)this_rd_thresh, seg_mvs,
+ mi_row, mi_col);
} else {
if (cpi->common.mcomp_filter_type == SWITCHABLE) {
int rs = get_switchable_rate(cm, x);
@@ -2843,7 +2945,6 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
compmode_cost =
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
- mbmi->mode = this_mode;
} else {
YV12_BUFFER_CONFIG *scaled_ref_frame[2] = {NULL, NULL};
int fb = get_ref_frame_idx(cpi, mbmi->ref_frame);
@@ -2938,14 +3039,16 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mode = this_mode;
}
- // Store the respective mode distortions for later use.
- if (mode_distortions[this_mode] == -1
- || distortion2 < mode_distortions[this_mode]) {
- mode_distortions[this_mode] = distortion2;
- }
- if (frame_distortions[mbmi->ref_frame] == -1
- || distortion2 < frame_distortions[mbmi->ref_frame]) {
- frame_distortions[mbmi->ref_frame] = distortion2;
+ if (this_mode != I4X4_PRED && this_mode != SPLITMV) {
+ // Store the respective mode distortions for later use.
+ if (mode_distortions[this_mode] == -1
+ || distortion2 < mode_distortions[this_mode]) {
+ mode_distortions[this_mode] = distortion2;
+ }
+ if (frame_distortions[mbmi->ref_frame] == -1
+ || distortion2 < frame_distortions[mbmi->ref_frame]) {
+ frame_distortions[mbmi->ref_frame] = distortion2;
+ }
}
// Did this mode help.. i.e. is it the new best mode
@@ -2954,7 +3057,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Note index of best mode so far
best_mode_index = mode_index;
- if (this_mode <= I4X4_PRED) {
+ if (ref_frame == INTRA_FRAME) {
/* required for left and above block mv */
mbmi->mv[0].as_int = 0;
}
@@ -3052,8 +3155,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Flag all modes that have a distortion thats > 2x the best we found at
// this level.
for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
- if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV
- || mode_index == SPLITMV)
+ if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
continue;
if (mode_distortions[mode_index] > 2 * *returndistortion) {
@@ -3077,7 +3179,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
assert((cm->mcomp_filter_type == SWITCHABLE) ||
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
- (best_mbmode.mode <= I4X4_PRED));
+ (best_mbmode.ref_frame == INTRA_FRAME));
// Accumulate filter usage stats
// TODO(agrange): Use RD criteria to select interpolation filter mode.
@@ -3129,13 +3231,15 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// macroblock modes
vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
- if (best_mbmode.mode == I4X4_PRED) {
+ if (best_mbmode.ref_frame == INTRA_FRAME &&
+ best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
for (i = 0; i < 4; i++) {
xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
}
}
- if (best_mbmode.mode == SPLITMV) {
+ if (best_mbmode.ref_frame != INTRA_FRAME &&
+ best_mbmode.sb_type < BLOCK_SIZE_SB8X8) {
for (i = 0; i < 4; i++)
xd->mode_info_context->bmi[i].as_mv[0].as_int =
best_bmodes[i].as_mv[0].as_int;