summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_bitstream.c154
-rw-r--r--vp9/encoder/vp9_block.h4
-rw-r--r--vp9/encoder/vp9_encodeframe.c169
-rw-r--r--vp9/encoder/vp9_encodeintra.c11
-rw-r--r--vp9/encoder/vp9_encodeintra.h6
-rw-r--r--vp9/encoder/vp9_encodemb.c118
-rw-r--r--vp9/encoder/vp9_encodemv.c7
-rw-r--r--vp9/encoder/vp9_lookahead.c12
-rw-r--r--vp9/encoder/vp9_mcomp.c5
-rw-r--r--vp9/encoder/vp9_mcomp.h2
-rw-r--r--vp9/encoder/vp9_modecosts.c7
-rw-r--r--vp9/encoder/vp9_onyx_if.c51
-rw-r--r--vp9/encoder/vp9_onyx_int.h17
-rw-r--r--vp9/encoder/vp9_quantize.c41
-rw-r--r--vp9/encoder/vp9_rdopt.c610
-rw-r--r--vp9/encoder/vp9_sad_c.c62
-rw-r--r--vp9/encoder/vp9_temporal_filter.c13
-rw-r--r--vp9/encoder/vp9_tokenize.c14
-rw-r--r--vp9/encoder/vp9_tokenize.h4
-rw-r--r--vp9/encoder/vp9_variance.h2
-rw-r--r--vp9/encoder/x86/vp9_variance_sse2.c212
-rw-r--r--vp9/encoder/x86/vp9_variance_ssse3.c9
22 files changed, 825 insertions, 705 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index d8839cd14..5916bae2b 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -369,11 +369,6 @@ static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) {
write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
}
-
-static void write_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
- write_token(bc, vp9_bmode_tree, p, vp9_bmode_encodings + m);
-}
-
static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
}
@@ -411,7 +406,6 @@ static int prob_diff_update_savings_search(const unsigned int *ct,
return bestsavings;
}
-#if CONFIG_MODELCOEFPROB
static int prob_diff_update_savings_search_model(const unsigned int *ct,
const vp9_prob *oldp,
vp9_prob *bestp,
@@ -420,7 +414,8 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct,
int i, old_b, new_b, update_b, savings, bestsavings, step;
int newp;
vp9_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES];
- vp9_model_to_full_probs(oldp, b, r, oldplist);
+ vp9_model_to_full_probs(oldp, oldplist);
+ vpx_memcpy(newplist, oldp, sizeof(vp9_prob) * UNCONSTRAINED_NODES);
for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i)
old_b += cost_branch256(ct + 2 * i, oldplist[i]);
old_b += cost_branch256(ct + 2 * PIVOT_NODE, oldplist[PIVOT_NODE]);
@@ -433,7 +428,7 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct,
for (; newp != oldp[PIVOT_NODE]; newp += step) {
if (newp < 1 || newp > 255) continue;
newplist[PIVOT_NODE] = newp;
- vp9_get_model_distribution(newp, newplist, b, r);
+ vp9_model_to_full_probs(newplist, newplist);
for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i)
new_b += cost_branch256(ct + 2 * i, newplist[i]);
new_b += cost_branch256(ct + 2 * PIVOT_NODE, newplist[PIVOT_NODE]);
@@ -448,7 +443,6 @@ static int prob_diff_update_savings_search_model(const unsigned int *ct,
*bestp = bestnewp;
return bestsavings;
}
-#endif
static void vp9_cond_prob_update(vp9_writer *bc, vp9_prob *oldp, vp9_prob upd,
unsigned int *ct) {
@@ -479,25 +473,18 @@ static void pack_mb_tokens(vp9_writer* const bc,
int v = a->value;
int n = a->len;
int ncount = n;
-#if CONFIG_MODELCOEFPROB
vp9_prob probs[ENTROPY_NODES];
-#endif
if (t == EOSB_TOKEN) {
++p;
break;
}
-#if CONFIG_MODELCOEFPROB
if (t >= TWO_TOKEN) {
- vp9_model_to_full_probs(p->context_tree,
- p->block_type, p->ref_type, probs);
+ vp9_model_to_full_probs(p->context_tree, probs);
pp = probs;
} else {
pp = p->context_tree;
}
-#else
- pp = p->context_tree;
-#endif
assert(pp != 0);
/* skip one or two nodes */
@@ -729,26 +716,17 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
active_section = 6;
#endif
-#if CONFIG_AB4X4
if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8)
write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
-#else
- if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
- write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
- else
- write_ymode(bc, mode, pc->fc.ymode_prob);
-#endif
-#if CONFIG_AB4X4
if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
-#else
- if (mode == I4X4_PRED) {
-#endif
- int j = 0;
- do {
- write_bmode(bc, m->bmi[j].as_mode.first,
- pc->fc.bmode_prob);
- } while (++j < 4);
+ int idx, idy;
+ int bw = 1 << b_width_log2(mi->sb_type);
+ int bh = 1 << b_height_log2(mi->sb_type);
+ for (idy = 0; idy < 2; idy += bh)
+ for (idx = 0; idx < 2; idx += bw)
+ write_sb_ymode(bc, m->bmi[idy * 2 + idx].as_mode.first,
+ pc->fc.sb_ymode_prob);
}
write_uv_mode(bc, mi->uv_mode,
pc->fc.uv_mode_prob[mode]);
@@ -763,16 +741,8 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
// If segment skip is not enabled code the mode.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
-#if CONFIG_AB4X4
if (mi->sb_type >= BLOCK_SIZE_SB8X8)
write_sb_mv_ref(bc, mode, mv_ref_p);
-#else
- if (mi->sb_type > BLOCK_SIZE_SB8X8) {
- write_sb_mv_ref(bc, mode, mv_ref_p);
- } else {
- write_mv_ref(bc, mode, mv_ref_p);
- }
-#endif
vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]);
}
@@ -819,9 +789,6 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
int bwl = b_width_log2(mi->sb_type), bw = 1 << bwl;
int bhl = b_height_log2(mi->sb_type), bh = 1 << bhl;
int idx, idy;
-#if !CONFIG_AB4X4
- bw = 1, bh = 1;
-#endif
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
j = idy * 2 + idx;
@@ -861,20 +828,9 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
}
}
-#if CONFIG_AB4X4
- if (((rf == INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8) ||
- (rf != INTRA_FRAME && mi->sb_type >= BLOCK_SIZE_SB8X8)) &&
- pc->txfm_mode == TX_MODE_SELECT &&
- !(skip_coeff || vp9_segfeature_active(xd, segment_id,
- SEG_LVL_SKIP)))
-#else
- if (((rf == INTRA_FRAME && mode != I4X4_PRED) ||
- (rf != INTRA_FRAME && mode != SPLITMV)) &&
- pc->txfm_mode == TX_MODE_SELECT &&
- !(skip_coeff || vp9_segfeature_active(xd, segment_id,
- SEG_LVL_SKIP)))
-#endif
- {
+ if (mi->sb_type >= BLOCK_SIZE_SB8X8 && pc->txfm_mode == TX_MODE_SELECT &&
+ !(rf != INTRA_FRAME &&
+ (skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
@@ -891,8 +847,8 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
vp9_writer *bc, int mi_row, int mi_col) {
const VP9_COMMON *const c = &cpi->common;
const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const int mis = c->mode_info_stride;
const int ym = m->mbmi.mode;
+ const int mis = c->mode_info_stride;
const int segment_id = m->mbmi.segment_id;
int skip_coeff;
@@ -906,45 +862,32 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP));
}
-#if CONFIG_AB4X4
- if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8)
- sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
-#else
- if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
- sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
- else
- kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
-#endif
+ if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
+ const B_PREDICTION_MODE A = above_block_mode(m, 0, mis);
+ const B_PREDICTION_MODE L = xd->left_available ?
+ left_block_mode(m, 0) : DC_PRED;
+ write_kf_bmode(bc, ym, c->kf_bmode_prob[A][L]);
+ }
-#if CONFIG_AB4X4
if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8) {
-#else
- if (ym == I4X4_PRED) {
-#endif
- int i = 0;
- do {
- const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE l = (xd->left_available ||
- (i & 1)) ?
- left_block_mode(m, i) : B_DC_PRED;
- const int bm = m->bmi[i].as_mode.first;
-
-/*#ifdef ENTROPY_STATS
- ++intra_mode_stats [A] [L] [bm];
-#endif*/
- write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]);
- } while (++i < 4);
+ int idx, idy;
+ int bw = 1 << b_width_log2(m->mbmi.sb_type);
+ int bh = 1 << b_height_log2(m->mbmi.sb_type);
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ int i = idy * 2 + idx;
+ const B_PREDICTION_MODE A = above_block_mode(m, i, mis);
+ const B_PREDICTION_MODE L = (xd->left_available || idx) ?
+ left_block_mode(m, i) : DC_PRED;
+ write_kf_bmode(bc, m->bmi[i].as_mode.first,
+ c->kf_bmode_prob[A][L]);
+ }
+ }
}
write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
-#if CONFIG_AB4X4
- if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT &&
- !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
-#else
- if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT &&
- !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
-#endif
+ if (m->mbmi.sb_type >= BLOCK_SIZE_SB8X8 && c->txfm_mode == TX_MODE_SELECT) {
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
@@ -962,11 +905,9 @@ static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
-#if CONFIG_AB4X4
if (m->mbmi.sb_type < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
-#endif
xd->mode_info_context = m;
set_mi_row_col(&cpi->common, xd, mi_row,
1 << mi_height_log2(m->mbmi.sb_type),
@@ -1019,17 +960,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
else
assert(0);
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
-#endif
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
- if (bsize > BLOCK_SIZE_SB8X8) {
-#endif
int pl;
xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK);
xd->above_seg_context = cm->above_seg_context + mi_col;
@@ -1071,13 +1006,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
}
// update partition context
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(bsize == BLOCK_SIZE_SB8X8 || partition != PARTITION_SPLIT)) {
-#else
- if (bsize > BLOCK_SIZE_SB8X8 &&
- (bsize == BLOCK_SIZE_MB16X16 || partition != PARTITION_SPLIT)) {
-#endif
set_partition_seg_context(cm, xd, mi_row, mi_col);
update_partition_context(xd, subsize, bsize);
}
@@ -1101,7 +1031,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context));
for (mi_col = c->cur_tile_mi_col_start;
mi_col < c->cur_tile_mi_col_end;
- mi_col += 8, m += 8)
+ mi_col += 64 / MI_SIZE, m += 64 / MI_SIZE)
write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col,
BLOCK_SIZE_SB64X64);
}
@@ -1219,22 +1149,14 @@ static void update_coef_probs_common(
vp9_coeff_stats *tree_update_hist,
#endif
vp9_coeff_probs *new_frame_coef_probs,
-#if CONFIG_MODELCOEFPROB
vp9_coeff_probs_model *old_frame_coef_probs,
-#else
- vp9_coeff_probs *old_frame_coef_probs,
-#endif
vp9_coeff_stats *frame_branch_ct,
TX_SIZE tx_size) {
int i, j, k, l, t;
int update[2] = {0, 0};
int savings;
-#if CONFIG_MODELCOEFPROB
const int entropy_nodes_update = UNCONSTRAINED_NODES;
-#else
- const int entropy_nodes_update = ENTROPY_NODES;
-#endif
// vp9_prob bestupd = find_coef_update_prob(cpi);
const int tstart = 0;
@@ -1254,13 +1176,11 @@ static void update_coef_probs_common(
if (l >= 3 && k == 0)
continue;
-#if CONFIG_MODELCOEFPROB
if (t == PIVOT_NODE)
s = prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
else
-#endif
s = prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t], oldp, &newp, upd);
if (s > 0 && newp != oldp)
@@ -1298,13 +1218,11 @@ static void update_coef_probs_common(
if (l >= 3 && k == 0)
continue;
-#if CONFIG_MODELCOEFPROB
if (t == PIVOT_NODE)
s = prob_diff_update_savings_search_model(
frame_branch_ct[i][j][k][l][0],
old_frame_coef_probs[i][j][k][l], &newp, upd, i, j);
else
-#endif
s = prob_diff_update_savings_search(
frame_branch_ct[i][j][k][l][t],
*oldp, &newp, upd);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index d3851b428..84e1a1fdb 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -140,11 +140,9 @@ struct macroblock {
// TODO(jingning): Need to refactor the structure arrays that buffers the
// coding mode decisions of each partition type.
-#if CONFIG_AB4X4
PICK_MODE_CONTEXT ab4x4_context[4][4][4];
PICK_MODE_CONTEXT sb8x4_context[4][4][4];
PICK_MODE_CONTEXT sb4x8_context[4][4][4];
-#endif
PICK_MODE_CONTEXT sb8x8_context[4][4][4];
PICK_MODE_CONTEXT sb8x16_context[4][4][2];
PICK_MODE_CONTEXT sb16x8_context[4][4][2];
@@ -158,9 +156,7 @@ struct macroblock {
PICK_MODE_CONTEXT sb64_context;
int partition_cost[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
-#if CONFIG_AB4X4
BLOCK_SIZE_TYPE b_partitioning[4][4][4];
-#endif
BLOCK_SIZE_TYPE mb_partitioning[4][4];
BLOCK_SIZE_TYPE sb_partitioning[4];
BLOCK_SIZE_TYPE sb64_partitioning;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f3a03f3c8..6c129ebbf 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -503,21 +503,19 @@ static unsigned find_seg_id(uint8_t *buf, BLOCK_SIZE_TYPE bsize,
void vp9_setup_src_planes(MACROBLOCK *x,
const YV12_BUFFER_CONFIG *src,
int mb_row, int mb_col) {
- setup_pred_plane(&x->plane[0].src,
- src->y_buffer, src->y_stride,
- mb_row, mb_col, NULL,
- x->e_mbd.plane[0].subsampling_x,
- x->e_mbd.plane[0].subsampling_y);
- setup_pred_plane(&x->plane[1].src,
- src->u_buffer, src->uv_stride,
- mb_row, mb_col, NULL,
- x->e_mbd.plane[1].subsampling_x,
- x->e_mbd.plane[1].subsampling_y);
- setup_pred_plane(&x->plane[2].src,
- src->v_buffer, src->uv_stride,
- mb_row, mb_col, NULL,
- x->e_mbd.plane[2].subsampling_x,
- x->e_mbd.plane[2].subsampling_y);
+ uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
+ src->alpha_buffer};
+ int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
+ src->alpha_stride};
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ setup_pred_plane(&x->plane[i].src,
+ buffers[i], strides[i],
+ mb_row, mb_col, NULL,
+ x->e_mbd.plane[i].subsampling_x,
+ x->e_mbd.plane[i].subsampling_y);
+ }
}
static void set_offsets(VP9_COMP *cpi,
@@ -621,11 +619,9 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index != 0)
return;
-#endif
set_offsets(cpi, mi_row, mi_col, bsize);
xd->mode_info_context->mbmi.sb_type = bsize;
@@ -710,14 +706,12 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_SIZE_SB8X8:
return &x->sb8x8_context[xd->sb_index][xd->mb_index][xd->b_index];
-#if CONFIG_AB4X4
case BLOCK_SIZE_SB8X4:
return &x->sb8x4_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_SIZE_SB4X8:
return &x->sb4x8_context[xd->sb_index][xd->mb_index][xd->b_index];
case BLOCK_SIZE_AB4X4:
return &x->ab4x4_context[xd->sb_index][xd->mb_index][xd->b_index];
-#endif
default:
assert(0);
return NULL;
@@ -734,10 +728,8 @@ static BLOCK_SIZE_TYPE *get_sb_partitioning(MACROBLOCK *x,
return &x->sb_partitioning[xd->sb_index];
case BLOCK_SIZE_MB16X16:
return &x->mb_partitioning[xd->sb_index][xd->mb_index];
-#if CONFIG_AB4X4
case BLOCK_SIZE_SB8X8:
return &x->b_partitioning[xd->sb_index][xd->mb_index][xd->b_index];
-#endif
default:
assert(0);
return NULL;
@@ -787,11 +779,9 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
if (sub_index != -1)
*(get_sb_index(xd, bsize)) = sub_index;
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index > 0)
return;
-#endif
set_offsets(cpi, mi_row, mi_col, bsize);
update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
@@ -818,13 +808,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
-#if CONFIG_AB4X4
c1 = BLOCK_SIZE_AB4X4;
- if (bsize >= BLOCK_SIZE_SB8X8)
-#else
- if (bsize > BLOCK_SIZE_SB8X8)
-#endif
- {
+ if (bsize >= BLOCK_SIZE_SB8X8) {
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
c1 = *(get_sb_partitioning(x, bsize));
@@ -833,13 +818,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
bwl = b_width_log2(c1), bhl = b_height_log2(c1);
if (bsl == bwl && bsl == bhl) {
-#if CONFIG_AB4X4
if (output_enabled && bsize >= BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
-#else
- if (output_enabled && bsize > BLOCK_SIZE_SB8X8)
- cpi->partition_count[pl][PARTITION_NONE]++;
-#endif
encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
} else if (bsl == bhl && bsl > bwl) {
if (output_enabled)
@@ -870,13 +850,8 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
}
}
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(bsize == BLOCK_SIZE_SB8X8 || bsl == bwl || bsl == bhl)) {
-#else
- if (bsize > BLOCK_SIZE_SB8X8 &&
- (bsize == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
-#endif
set_partition_seg_context(cm, xd, mi_row, mi_col);
update_partition_context(xd, c1, bsize);
}
@@ -902,14 +877,12 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
BLOCK_SIZE_TYPE subsize;
int srate = INT_MAX, sdist = INT_MAX;
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
if (xd->ab_index != 0) {
*rate = 0;
*dist = 0;
return;
}
-#endif
assert(mi_height_log2(bsize) == mi_width_log2(bsize));
// buffer the above/left context information of the block in search.
@@ -927,11 +900,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
sizeof(PARTITION_CONTEXT) * ms);
// PARTITION_SPLIT
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
- if (bsize >= BLOCK_SIZE_MB16X16) {
-#endif
int r4 = 0, d4 = 0;
subsize = get_subsize(bsize, PARTITION_SPLIT);
*(get_sb_partitioning(x, bsize)) = subsize;
@@ -953,12 +922,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
if (r4 < INT_MAX)
r4 += x->partition_cost[pl][PARTITION_SPLIT];
-#else
- r4 += x->partition_cost[pl][PARTITION_SPLIT];
-#endif
assert(r4 >= 0);
assert(d4 >= 0);
srate = r4;
@@ -968,11 +933,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
// PARTITION_HORZ
if ((mi_col + ms <= cm->mi_cols) && (mi_row + (ms >> 1) <= cm->mi_rows) &&
-#if CONFIG_AB4X4
(bsize >= BLOCK_SIZE_SB8X8)) {
-#else
- (bsize >= BLOCK_SIZE_MB16X16)) {
-#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_HORZ);
@@ -995,12 +956,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_HORZ];
-#else
- r2 += x->partition_cost[pl][PARTITION_HORZ];
-#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;
@@ -1012,11 +969,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
// PARTITION_VERT
if ((mi_row + ms <= cm->mi_rows) && (mi_col + (ms >> 1) <= cm->mi_cols) &&
-#if CONFIG_AB4X4
(bsize >= BLOCK_SIZE_SB8X8)) {
-#else
- (bsize >= BLOCK_SIZE_MB16X16)) {
-#endif
int r2, d2;
int mb_skip = 0;
subsize = get_subsize(bsize, PARTITION_VERT);
@@ -1038,12 +991,8 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
}
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
-#if CONFIG_AB4X4
if (r2 < INT_MAX)
r2 += x->partition_cost[pl][PARTITION_VERT];
-#else
- r2 += x->partition_cost[pl][PARTITION_VERT];
-#endif
if ((RDCOST(x->rdmult, x->rddiv, r2, d2) <
RDCOST(x->rdmult, x->rddiv, srate, sdist)) && !mb_skip) {
srate = r2;
@@ -1058,11 +1007,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
int r, d;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, bsize,
get_block_context(x, bsize));
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8) {
-#else
- if (bsize >= BLOCK_SIZE_MB16X16) {
-#endif
set_partition_seg_context(cm, xd, mi_row, mi_col);
pl = partition_plane_context(xd, bsize);
r += x->partition_cost[pl][PARTITION_NONE];
@@ -1072,11 +1017,7 @@ static void rd_pick_partition(VP9_COMP *cpi, TOKENEXTRA **tp,
RDCOST(x->rdmult, x->rddiv, srate, sdist)) {
srate = r;
sdist = d;
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8)
-#else
- if (bsize >= BLOCK_SIZE_MB16X16)
-#endif
*(get_sb_partitioning(x, bsize)) = bsize;
}
}
@@ -1260,6 +1201,8 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vpx_memset(cpi->txfm_count_16x16p, 0, sizeof(cpi->txfm_count_16x16p));
vpx_memset(cpi->txfm_count_8x8p, 0, sizeof(cpi->txfm_count_8x8p));
vpx_memset(cpi->rd_tx_select_diff, 0, sizeof(cpi->rd_tx_select_diff));
+ vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
+
{
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
@@ -1514,7 +1457,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
} else
txfm_type = ALLOW_8X8;
#else
- txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >=
+ txfm_type = cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_32X32 : TX_MODE_SELECT;
#endif
@@ -1603,22 +1546,22 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
-#if CONFIG_AB4X4
if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB8X8) {
-#else
- if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) {
-#endif
++cpi->sb_ymode_count[m];
} else {
++cpi->ymode_count[m];
}
++cpi->y_uv_mode_count[m][uvm];
if (m == I4X4_PRED) {
- int b = 0;
- do {
- int m = xd->mode_info_context->bmi[b].as_mode.first;
- ++cpi->bmode_count[m];
- } while (++b < 4);
+ int idx, idy;
+ int bw = 1 << b_width_log2(xd->mode_info_context->mbmi.sb_type);
+ int bh = 1 << b_height_log2(xd->mode_info_context->mbmi.sb_type);
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ int m = xd->mode_info_context->bmi[idy * 2 + idx].as_mode.first;
+ ++cpi->sb_ymode_count[m];
+ }
+ }
}
}
@@ -1693,22 +1636,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
vp9_update_zbin_extra(cpi, x);
}
-#if CONFIG_AB4X4
- if (mbmi->ref_frame == INTRA_FRAME &&
- bsize < BLOCK_SIZE_SB8X8) {
-#else
- if (mbmi->mode == I4X4_PRED) {
- assert(bsize == BLOCK_SIZE_SB8X8 && mbmi->txfm_size == TX_4X4);
-#endif
- vp9_encode_intra4x4mby(x, BLOCK_SIZE_SB8X8);
- vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_SB8X8);
- vp9_encode_sbuv(cm, x, BLOCK_SIZE_SB8X8);
-
- if (output_enabled)
- sum_intra_stats(cpi, x);
- } else if (mbmi->ref_frame == INTRA_FRAME) {
- vp9_build_intra_predictors_sby_s(xd, bsize);
- vp9_build_intra_predictors_sbuv_s(xd, bsize);
+ if (mbmi->ref_frame == INTRA_FRAME) {
+ vp9_encode_intra_block_y(cm, x, (bsize < BLOCK_SIZE_SB8X8) ?
+ BLOCK_SIZE_SB8X8 : bsize);
+ vp9_encode_intra_block_uv(cm, x, (bsize < BLOCK_SIZE_SB8X8) ?
+ BLOCK_SIZE_SB8X8 : bsize);
if (output_enabled)
sum_intra_stats(cpi, x);
} else {
@@ -1730,14 +1662,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
: bsize);
}
-#if CONFIG_AB4X4
- if (mbmi->ref_frame == INTRA_FRAME &&
- bsize < BLOCK_SIZE_SB8X8) {
-#else
- if (mbmi->mode == I4X4_PRED) {
- assert(bsize == BLOCK_SIZE_SB8X8);
-#endif
- vp9_tokenize_sb(cpi, xd, t, !output_enabled, BLOCK_SIZE_SB8X8);
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ vp9_tokenize_sb(cpi, xd, t, !output_enabled,
+ (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
} else if (!x->skip) {
vp9_encode_sb(cm, x, (bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 : bsize);
vp9_tokenize_sb(cpi, xd, t, !output_enabled,
@@ -1764,8 +1691,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
if (output_enabled) {
if (cm->txfm_mode == TX_MODE_SELECT &&
- !(mbmi->mb_skip_coeff ||
- vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
+ mbmi->sb_type >= BLOCK_SIZE_SB8X8 &&
+ !(mbmi->ref_frame != INTRA_FRAME && (mbmi->mb_skip_coeff ||
+ vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) {
if (bsize >= BLOCK_SIZE_SB32X32) {
cpi->txfm_count_32x32p[mbmi->txfm_size]++;
} else if (bsize >= BLOCK_SIZE_MB16X16) {
@@ -1776,18 +1704,19 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
} else {
int x, y;
TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode;
-
- if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
- sz = TX_16X16;
- if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
- sz = TX_8X8;
-#if CONFIG_AB4X4
- if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
-#else
- if (sz == TX_8X8 && (mbmi->mode == SPLITMV ||
- mbmi->mode == I4X4_PRED))
-#endif
+ // The new intra coding scheme requires no change of transform size
+ if (mi->mbmi.ref_frame != INTRA_FRAME) {
+ if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
+ sz = TX_16X16;
+ if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
+ sz = TX_8X8;
+ if (sz == TX_8X8 && bsize < BLOCK_SIZE_SB8X8)
+ sz = TX_4X4;
+ } else if (bsize >= BLOCK_SIZE_SB8X8) {
+ sz = mbmi->txfm_size;
+ } else {
sz = TX_4X4;
+ }
for (y = 0; y < bh; y++) {
for (x = 0; x < bw; x++) {
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index f8cf50f84..91866b28f 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -80,15 +80,6 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib,
}
}
-void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) {
- int i;
- int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- int bc = 1 << (bwl + bhl);
-
- for (i = 0; i < bc; i++)
- encode_intra4x4block(mb, i, bsize);
-}
-
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
@@ -102,3 +93,5 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16);
}
+
+
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
index c26200494..7da164c6a 100644
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -16,5 +16,9 @@
int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
-void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs);
+void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *mb,
+ BLOCK_SIZE_TYPE bs);
+void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *mb,
+ BLOCK_SIZE_TYPE bs);
+
#endif // VP9_ENCODER_VP9_ENCODEINTRA_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 84b350792..3f2061c64 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -44,7 +44,6 @@ static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int plane) {
const uint8_t *src = x->plane[plane].src.buf;
const int src_stride = x->plane[plane].src.stride;
- assert(plane < 3);
vp9_subtract_block(bh, bw,
x->plane[plane].src_diff, bw, src, src_stride,
xd->plane[plane].dst.buf, xd->plane[plane].dst.stride);
@@ -168,7 +167,7 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
break;
}
case TX_32X32:
- scan = vp9_default_zig_zag1d_32x32;
+ scan = vp9_default_scan_32x32;
default_eob = 1024;
band_translate = vp9_coefband_trans_8x8plus;
break;
@@ -605,3 +604,118 @@ void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x,
foreach_transformed_block(xd, bsize, encode_block, &arg);
}
+
+static void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, void *arg) {
+ struct encode_b_args* const args = arg;
+ MACROBLOCK* const x = args->x;
+ MACROBLOCKD* const xd = &x->e_mbd;
+ const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2);
+ const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
+ const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
+ block, ss_txfrm_size);
+ uint8_t* const src =
+ raster_block_offset_uint8(xd, bsize, plane, raster_block,
+ x->plane[plane].src.buf,
+ x->plane[plane].src.stride);
+ uint8_t* const dst =
+ raster_block_offset_uint8(xd, bsize, plane, raster_block,
+ xd->plane[plane].dst.buf,
+ xd->plane[plane].dst.stride);
+ int16_t* const src_diff =
+ raster_block_offset_int16(xd, bsize, plane,
+ raster_block, x->plane[plane].src_diff);
+
+ const int txfm_b_size = 4 << tx_size;
+ int ib = raster_block;
+ int tx_ib = ib >> tx_size;
+ int plane_b_size;
+
+ TX_TYPE tx_type;
+ int mode, b_mode;
+
+ mode = plane == 0? xd->mode_info_context->mbmi.mode:
+ xd->mode_info_context->mbmi.uv_mode;
+ if (bsize <= BLOCK_SIZE_SB8X8 && mode == I4X4_PRED && plane == 0)
+ b_mode = xd->mode_info_context->bmi[ib].as_mode.first;
+ else
+ b_mode = mode;
+
+ assert(b_mode >= B_DC_PRED && b_mode <= B_TM_PRED);
+
+ plane_b_size = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
+ vp9_predict_intra_block(xd, tx_ib, plane_b_size, tx_size, b_mode,
+ dst, xd->plane[plane].dst.stride);
+ vp9_subtract_block(txfm_b_size, txfm_b_size,
+ src_diff, bw,
+ src, x->plane[plane].src.stride,
+ dst, xd->plane[plane].dst.stride);
+
+ xform_quant(plane, block, bsize, ss_txfrm_size, arg);
+
+ /*
+ if (x->optimize)
+ vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
+ */
+ switch (ss_txfrm_size / 2) {
+ case TX_32X32:
+ vp9_short_idct32x32_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
+ block, 16), dst, xd->plane[plane].dst.stride);
+ break;
+ case TX_16X16:
+ tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ vp9_short_idct16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
+ block, 16), dst, xd->plane[plane].dst.stride);
+ } else {
+ vp9_short_iht16x16_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
+ block, 16), dst, xd->plane[plane].dst.stride,
+ tx_type);
+ }
+ break;
+ case TX_8X8:
+ tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ vp9_short_idct8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
+ block, 16), dst, xd->plane[plane].dst.stride);
+ } else {
+ vp9_short_iht8x8_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff,
+ block, 16), dst, xd->plane[plane].dst.stride,
+ tx_type);
+ }
+ break;
+ case TX_4X4:
+ tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ // this is like vp9_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp9_inverse_transform_b_4x4_add(xd, xd->plane[plane].eobs[block],
+ BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), dst,
+ xd->plane[plane].dst.stride);
+ } else {
+ vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ dst, xd->plane[plane].dst.stride, tx_type);
+ }
+ break;
+ }
+}
+
+void vp9_encode_intra_block_y(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {cm, x, &ctx};
+
+ foreach_transformed_block_in_plane(xd, bsize, 0,
+ encode_block_intra, &arg);
+}
+void vp9_encode_intra_block_uv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {cm, x, &ctx};
+
+ foreach_transformed_block_uv(xd, bsize, encode_block_intra, &arg);
+}
+
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index e2cd8838c..1bb7fa88d 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -573,16 +573,9 @@ void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x,
int bhl = b_height_log2(mbmi->sb_type), bh = 1 << bhl;
int idx, idy;
-#if CONFIG_AB4X4
if (mbmi->sb_type < BLOCK_SIZE_SB8X8) {
-#else
- if (mbmi->mode == SPLITMV) {
-#endif
int i;
PARTITION_INFO *pi = x->partition_info;
-#if !CONFIG_AB4X4
- bw = 1, bh = 1;
-#endif
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
i = idy * 2 + idx;
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index 708fe4549..b07d92a44 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -84,20 +84,27 @@ bail:
return NULL;
}
+#define USE_PARTIAL_COPY 0
int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end, unsigned int flags,
unsigned char *active_map) {
struct lookahead_entry *buf;
+#if USE_PARTIAL_COPY
int row, col, active_end;
int mb_rows = (src->y_height + 15) >> 4;
int mb_cols = (src->y_width + 15) >> 4;
+#endif
if (ctx->sz + 1 > ctx->max_sz)
return 1;
ctx->sz++;
buf = pop(ctx, &ctx->write_idx);
+#if USE_PARTIAL_COPY
+ // TODO(jkoleszar): This is disabled for now, as
+ // vp9_copy_and_extend_frame_with_rect is not subsampling/alpha aware.
+
// Only do this partial copy if the following conditions are all met:
// 1. Lookahead queue has has size of 1.
// 2. Active map is provided.
@@ -140,6 +147,11 @@ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
} else {
vp9_copy_and_extend_frame(src, &buf->img);
}
+#else
+ // Partial copy not implemented yet
+ vp9_copy_and_extend_frame(src, &buf->img);
+#endif
+
buf->ts_start = ts_start;
buf->ts_end = ts_end;
buf->flags = flags;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 75e6e6757..2e99736ce 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -414,7 +414,6 @@ int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x,
return besterr;
}
-#if CONFIG_COMP_INTER_JOINT_SEARCH
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
@@ -606,7 +605,7 @@ int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
return besterr;
}
-#endif // CONFIG_COMP_INTER_JOINT_SEARCH
+
#undef MVC
#undef PRE
@@ -2327,7 +2326,6 @@ int vp9_refining_search_sadx4(MACROBLOCK *x,
return INT_MAX;
}
-#if CONFIG_COMP_INTER_JOINT_SEARCH
/* This function is called when we do joint motion search in comp_inter_inter
* mode.
*/
@@ -2429,4 +2427,3 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
return INT_MAX;
}
}
-#endif // CONFIG_COMP_INTER_JOINT_SEARCH
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 33e688b97..28b2efd28 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -75,7 +75,6 @@ typedef int (*vp9_diamond_search_fn_t)(MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
int_mv *center_mv);
-#if CONFIG_COMP_INTER_JOINT_SEARCH
int vp9_find_best_sub_pixel_comp(MACROBLOCK *x,
int_mv *bestmv, int_mv *ref_mv,
int error_per_bit,
@@ -91,5 +90,4 @@ int vp9_refining_search_8p_c(MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
int_mv *center_mv, const uint8_t *second_pred,
int w, int h);
-#endif // CONFIG_COMP_INTER_JOINT_SEARCH
#endif // VP9_ENCODER_VP9_MCOMP_H_
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index e26daf0c9..e58ff40d9 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -33,10 +33,11 @@ void vp9_init_mode_costs(VP9_COMP *c) {
x->fc.sub_mv_ref_prob[0], vp9_sub_mv_ref_tree);
// TODO(rbultje) separate tables for superblock costing?
- vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp9_ymode_tree);
+ vp9_cost_tokens(c->mb.mbmode_cost[1], x->fc.sb_ymode_prob,
+ vp9_sb_ymode_tree);
vp9_cost_tokens(c->mb.mbmode_cost[0],
- x->kf_ymode_prob[c->common.kf_ymode_probs_index],
- vp9_kf_ymode_tree);
+ x->sb_kf_ymode_prob[c->common.kf_ymode_probs_index],
+ vp9_sb_ymode_tree);
vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index e5c4761cc..27eeb5c82 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -775,6 +775,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->optimize_coefficients = !cpi->oxcf.lossless;
sf->first_step = 0;
sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
+ sf->comp_inter_joint_serach = 1;
#if CONFIG_MULTIPLE_ARF
// Switch segmentation off.
sf->static_segmentation = 0;
@@ -785,7 +786,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->static_segmentation = 0;
#endif
#endif
- sf->splitmode_breakout = 0;
sf->mb16_breakout = 0;
switch (mode) {
@@ -804,13 +804,13 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->static_segmentation = 0;
#endif
#endif
- sf->splitmode_breakout = 1;
sf->mb16_breakout = 0;
if (speed > 0) {
/* Disable coefficient optimization above speed 0 */
sf->optimize_coefficients = 0;
sf->no_skip_block4x4_search = 0;
+ sf->comp_inter_joint_serach = 0;
sf->first_step = 1;
@@ -1636,12 +1636,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
BFP(BLOCK_8X4, vp9_sad8x4, vp9_variance8x4, vp9_sub_pixel_variance8x4,
vp9_sub_pixel_avg_variance8x4, NULL, NULL,
- NULL, NULL, NULL,
+ NULL, NULL, vp9_sad8x4x8,
vp9_sad8x4x4d)
BFP(BLOCK_4X8, vp9_sad4x8, vp9_variance4x8, vp9_sub_pixel_variance4x8,
vp9_sub_pixel_avg_variance4x8, NULL, NULL,
- NULL, NULL, NULL,
+ NULL, NULL, vp9_sad4x8x8,
vp9_sad4x8x4d)
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
@@ -2083,6 +2083,18 @@ void vp9_write_yuv_rec_frame(VP9_COMMON *cm) {
fwrite(src, s->uv_width, 1, yuv_rec_file);
src += s->uv_stride;
} while (--h);
+
+#if CONFIG_ALPHA
+ if (s->alpha_buffer) {
+ src = s->alpha_buffer;
+ h = s->alpha_height;
+ do {
+ fwrite(src, s->alpha_width, 1, yuv_rec_file);
+ src += s->alpha_stride;
+ } while (--h);
+ }
+#endif
+
fflush(yuv_rec_file);
}
#endif
@@ -2095,11 +2107,15 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
const int out_h = dst_fb->y_crop_height;
int x, y, i;
- uint8_t *srcs[3] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer};
- int src_strides[3] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride};
+ uint8_t *srcs[4] = {src_fb->y_buffer, src_fb->u_buffer, src_fb->v_buffer,
+ src_fb->alpha_buffer};
+ int src_strides[4] = {src_fb->y_stride, src_fb->uv_stride, src_fb->uv_stride,
+ src_fb->alpha_stride};
- uint8_t *dsts[3] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer};
- int dst_strides[3] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride};
+ uint8_t *dsts[4] = {dst_fb->y_buffer, dst_fb->u_buffer, dst_fb->v_buffer,
+ dst_fb->alpha_buffer};
+ int dst_strides[4] = {dst_fb->y_stride, dst_fb->uv_stride, dst_fb->uv_stride,
+ dst_fb->alpha_stride};
for (y = 0; y < out_h; y += 16) {
for (x = 0; x < out_w; x += 16) {
@@ -2791,6 +2807,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
#endif
loop_count = 0;
+ vpx_memset(cpi->rd_tx_select_threshes, 0, sizeof(cpi->rd_tx_select_threshes));
if (cm->frame_type != KEY_FRAME) {
/* TODO: Decide this more intelligently */
@@ -2919,11 +2936,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
#endif
// transform / motion compensation build reconstruction frame
-#if CONFIG_MODELCOEFPROB
if (cm->frame_type == KEY_FRAME) {
vp9_default_coef_probs(cm);
}
-#endif
vp9_encode_frame(cpi);
@@ -3153,6 +3168,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
+#if WRITE_RECON_BUFFER
+ if (cm->show_frame)
+ write_cx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 2000);
+ else
+ write_cx_frame_to_file(cm->frame_to_show,
+ cm->current_video_frame + 3000);
+#endif
+
// build the bitstream
cpi->dummy_packing = 0;
vp9_pack_bitstream(cpi, dest, size);
@@ -3172,7 +3196,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
release_scaled_references(cpi);
update_reference_frames(cpi);
-#if CONFIG_MODELCOEFPROB
vp9_full_to_model_counts(cpi->common.fc.coef_counts_4x4,
cpi->coef_counts_4x4);
vp9_full_to_model_counts(cpi->common.fc.coef_counts_8x8,
@@ -3181,12 +3204,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
cpi->coef_counts_16x16);
vp9_full_to_model_counts(cpi->common.fc.coef_counts_32x32,
cpi->coef_counts_32x32);
-#else
- vp9_copy(cpi->common.fc.coef_counts_4x4, cpi->coef_counts_4x4);
- vp9_copy(cpi->common.fc.coef_counts_8x8, cpi->coef_counts_8x8);
- vp9_copy(cpi->common.fc.coef_counts_16x16, cpi->coef_counts_16x16);
- vp9_copy(cpi->common.fc.coef_counts_32x32, cpi->coef_counts_32x32);
-#endif
if (!cpi->common.error_resilient_mode &&
!cpi->common.frame_parallel_decoding_mode) {
vp9_adapt_coef_probs(&cpi->common);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 57d19ca63..e3e95eda9 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -76,17 +76,10 @@ typedef struct {
// 0 = I4X4_PRED, ZERO_MV, MV, SPLIT
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
-#if CONFIG_MODELCOEFPROB
vp9_coeff_probs_model coef_probs_4x4[BLOCK_TYPES];
vp9_coeff_probs_model coef_probs_8x8[BLOCK_TYPES];
vp9_coeff_probs_model coef_probs_16x16[BLOCK_TYPES];
vp9_coeff_probs_model coef_probs_32x32[BLOCK_TYPES];
-#else
- vp9_coeff_probs coef_probs_4x4[BLOCK_TYPES];
- vp9_coeff_probs coef_probs_8x8[BLOCK_TYPES];
- vp9_coeff_probs coef_probs_16x16[BLOCK_TYPES];
- vp9_coeff_probs coef_probs_32x32[BLOCK_TYPES];
-#endif
vp9_prob sb_ymode_prob[VP9_I32X32_MODES - 1];
vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
@@ -231,9 +224,9 @@ typedef struct {
int optimize_coefficients;
int no_skip_block4x4_search;
int search_best_filter;
- int splitmode_breakout;
int mb16_breakout;
int static_segmentation;
+ int comp_inter_joint_serach;
} SPEED_FEATURES;
enum BlockSize {
@@ -265,6 +258,14 @@ typedef struct VP9_COMP {
DECLARE_ALIGNED(16, short, uv_zbin[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, uv_round[QINDEX_RANGE][16]);
+#if CONFIG_ALPHA
+ DECLARE_ALIGNED(16, short, a_quant[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, unsigned char, a_quant_shift[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, a_zbin[QINDEX_RANGE][16]);
+ DECLARE_ALIGNED(16, short, a_round[QINDEX_RANGE][16]);
+
+ DECLARE_ALIGNED(16, short, zrun_zbin_boost_a[QINDEX_RANGE][16]);
+#endif
DECLARE_ALIGNED(16, short, zrun_zbin_boost_y[QINDEX_RANGE][16]);
DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]);
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index aea350bc4..53d8be775 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -93,7 +93,7 @@ void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs,
scan = get_scan_16x16(tx_type);
break;
default:
- scan = vp9_default_zig_zag1d_32x32;
+ scan = vp9_default_scan_32x32;
break;
}
@@ -148,6 +148,9 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
int i;
int quant_val;
int quant_uv_val;
+#if CONFIG_ALPHA
+ int quant_alpha_val;
+#endif
int q;
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
@@ -168,7 +171,6 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->common.y_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_y[q][0] = (quant_val * zbin_boost[0]) >> 7;
-
quant_val = vp9_dc_quant(q, cpi->common.uv_dc_delta_q);
invert_quant(cpi->uv_quant[q] + 0, cpi->uv_quant_shift[q] + 0, quant_val);
cpi->uv_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
@@ -176,13 +178,26 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->common.uv_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7;
+#if CONFIG_ALPHA
+ quant_val = vp9_dc_quant(q, cpi->common.a_dc_delta_q);
+ invert_quant(cpi->a_quant[q] + 0, cpi->a_quant_shift[q] + 0, quant_val);
+ cpi->a_zbin[q][0] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
+ cpi->a_round[q][0] = (qrounding_factor * quant_val) >> 7;
+ cpi->common.a_dequant[q][0] = quant_val;
+ cpi->zrun_zbin_boost_a[q][0] = (quant_val * zbin_boost[0]) >> 7;
+#endif
+
quant_val = vp9_ac_quant(q, 0);
cpi->common.y_dequant[q][1] = quant_val;
quant_uv_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q);
cpi->common.uv_dequant[q][1] = quant_uv_val;
+#if CONFIG_ALPHA
+ quant_alpha_val = vp9_ac_quant(q, cpi->common.a_ac_delta_q);
+ cpi->common.a_dequant[q][1] = quant_alpha_val;
+#endif
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
- int rc = vp9_default_zig_zag1d_4x4[i];
+ int rc = vp9_default_scan_4x4[i];
invert_quant(cpi->y_quant[q] + rc, cpi->y_quant_shift[q] + rc, quant_val);
cpi->y_zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
@@ -196,6 +211,16 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->uv_round[q][rc] = (qrounding_factor * quant_uv_val) >> 7;
cpi->zrun_zbin_boost_uv[q][i] =
ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7);
+
+#if CONFIG_ALPHA
+ invert_quant(cpi->a_quant[q] + rc, cpi->a_quant_shift[q] + rc,
+ quant_alpha_val);
+ cpi->a_zbin[q][rc] =
+ ROUND_POWER_OF_TWO(qzbin_factor * quant_alpha_val, 7);
+ cpi->a_round[q][rc] = (qrounding_factor * quant_alpha_val) >> 7;
+ cpi->zrun_zbin_boost_a[q][i] =
+ ROUND_POWER_OF_TWO(quant_alpha_val * zbin_boost[i], 7);
+#endif
}
}
}
@@ -233,6 +258,16 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
}
+#if CONFIG_ALPHA
+ x->plane[3].quant = cpi->a_quant[qindex];
+ x->plane[3].quant_shift = cpi->a_quant_shift[qindex];
+ x->plane[3].zbin = cpi->a_zbin[qindex];
+ x->plane[3].round = cpi->a_round[qindex];
+ x->plane[3].zrun_zbin_boost = cpi->zrun_zbin_boost_a[qindex];
+ x->plane[3].zbin_extra = (int16_t)zbin_extra;
+ x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
+#endif
+
x->skip_block = vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
/* save this macroblock QIndex for vp9_update_zbin_extra() */
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8c1ef4915..15ed8318c 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -106,11 +106,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
};
static void fill_token_costs(vp9_coeff_count *c,
-#if CONFIG_MODELCOEFPROB
vp9_coeff_probs_model *p,
-#else
- vp9_coeff_probs *p,
-#endif
TX_SIZE tx_size) {
int i, j, k, l;
@@ -118,15 +114,10 @@ static void fill_token_costs(vp9_coeff_count *c,
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
-#if CONFIG_MODELCOEFPROB
vp9_prob probs[ENTROPY_NODES];
- vp9_model_to_full_probs(p[i][j][k][l], i, j, probs);
+ vp9_model_to_full_probs(p[i][j][k][l], probs);
vp9_cost_tokens_skip((int *)c[i][j][k][l], probs,
vp9_coef_tree);
-#else
- vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
- vp9_coef_tree);
-#endif
}
}
@@ -280,11 +271,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = DCT_DCT;
const int segment_id = xd->mode_info_context->mbmi.segment_id;
-#if CONFIG_MODELCOEFPROB
vp9_prob coef_probs[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#else
- vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][ENTROPY_NODES];
-#endif
int seg_eob, default_eob;
uint8_t token_cache[1024];
const uint8_t * band_translate;
@@ -304,12 +291,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
-#if CONFIG_MODELCOEFPROB
vp9_model_to_full_probs_sb(cm->fc.coef_probs_4x4[type][ref],
- type, ref, coef_probs);
-#else
- coef_probs = cm->fc.coef_probs_4x4[type][ref];
-#endif
+ coef_probs);
seg_eob = 16;
scan = get_scan_4x4(tx_type);
band_translate = vp9_coefband_trans_4x4;
@@ -324,12 +307,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
above_ec = (A[0] + A[1]) != 0;
left_ec = (L[0] + L[1]) != 0;
scan = get_scan_8x8(tx_type);
-#if CONFIG_MODELCOEFPROB
vp9_model_to_full_probs_sb(cm->fc.coef_probs_8x8[type][ref],
- type, ref, coef_probs);
-#else
- coef_probs = cm->fc.coef_probs_8x8[type][ref];
-#endif
+ coef_probs);
seg_eob = 64;
band_translate = vp9_coefband_trans_8x8plus;
break;
@@ -341,12 +320,8 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
scan = get_scan_16x16(tx_type);
-#if CONFIG_MODELCOEFPROB
vp9_model_to_full_probs_sb(cm->fc.coef_probs_16x16[type][ref],
- type, ref, coef_probs);
-#else
- coef_probs = cm->fc.coef_probs_16x16[type][ref];
-#endif
+ coef_probs);
seg_eob = 256;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3]) != 0;
@@ -354,13 +329,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
break;
}
case TX_32X32:
- scan = vp9_default_zig_zag1d_32x32;
-#if CONFIG_MODELCOEFPROB
+ scan = vp9_default_scan_32x32;
vp9_model_to_full_probs_sb(cm->fc.coef_probs_32x32[type][ref],
- type, ref, coef_probs);
-#else
- coef_probs = cm->fc.coef_probs_32x32[type][ref];
-#endif
+ coef_probs);
seg_eob = 1024;
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
@@ -609,78 +580,92 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
int *bmode_costs,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
int *bestrate, int *bestratey,
- int *bestdistortion) {
- B_PREDICTION_MODE mode;
+ int *bestdistortion,
+ BLOCK_SIZE_TYPE bsize) {
+ MB_PREDICTION_MODE mode;
MACROBLOCKD *xd = &x->e_mbd;
int64_t best_rd = INT64_MAX;
int rate = 0;
int distortion;
VP9_COMMON *const cm = &cpi->common;
const int src_stride = x->plane[0].src.stride;
- uint8_t* const src =
- raster_block_offset_uint8(xd,
- BLOCK_SIZE_SB8X8,
- 0, ib,
- x->plane[0].src.buf, src_stride);
- int16_t* const src_diff =
- raster_block_offset_int16(xd,
- BLOCK_SIZE_SB8X8,
- 0, ib,
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
- uint8_t* const dst =
- raster_block_offset_uint8(xd,
- BLOCK_SIZE_SB8X8,
- 0, ib,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
- ENTROPY_CONTEXT ta = *a, tempa = *a;
- ENTROPY_CONTEXT tl = *l, templ = *l;
+ uint8_t *src, *dst;
+ int16_t *src_diff, *coeff;
+
+ ENTROPY_CONTEXT ta[2], tempa[2];
+ ENTROPY_CONTEXT tl[2], templ[2];
TX_TYPE tx_type = DCT_DCT;
TX_TYPE best_tx_type = DCT_DCT;
- /*
- * The predictor buffer is a 2d buffer with a stride of 16. Create
- * a temp buffer that meets the stride requirements, but we are only
- * interested in the left 4x4 block
- * */
- DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
+ int bw = 1 << b_width_log2(bsize);
+ int bh = 1 << b_height_log2(bsize);
+ int idx, idy, block;
+ DECLARE_ALIGNED(16, int16_t, best_dqcoeff[4][16]);
assert(ib < 4);
+ vpx_memcpy(ta, a, sizeof(ta));
+ vpx_memcpy(tl, l, sizeof(tl));
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- for (mode = B_DC_PRED; mode < LEFT4X4; mode++) {
+
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
int64_t this_rd;
- int ratey;
+ int ratey = 0;
xd->mode_info_context->bmi[ib].as_mode.first = mode;
- rate = bmode_costs[mode];
+ if (cm->frame_type == KEY_FRAME)
+ rate = bmode_costs[mode];
+ else
+ rate = x->mbmode_cost[cm->frame_type][mode];
+ distortion = 0;
- vp9_intra4x4_predict(xd, ib,
- BLOCK_SIZE_SB8X8,
- mode, dst, xd->plane[0].dst.stride);
- vp9_subtract_block(4, 4, src_diff, 8,
- src, src_stride,
- dst, xd->plane[0].dst.stride);
+ vpx_memcpy(tempa, ta, sizeof(ta));
+ vpx_memcpy(templ, tl, sizeof(tl));
- xd->mode_info_context->bmi[ib].as_mode.first = mode;
- tx_type = get_tx_type_4x4(xd, ib);
- if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
- x->quantize_b_4x4(x, ib, tx_type, 16);
- } else {
- x->fwd_txm4x4(src_diff, coeff, 16);
- x->quantize_b_4x4(x, ib, tx_type, 16);
- }
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ block = ib + idy * 2 + idx;
+ xd->mode_info_context->bmi[block].as_mode.first = mode;
+ src = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
+ x->plane[0].src.buf, src_stride);
+ src_diff = raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, block,
+ x->plane[0].src_diff);
+ coeff = BLOCK_OFFSET(x->plane[0].coeff, block, 16);
+ dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride);
+ vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
+ dst, xd->plane[0].dst.stride);
+ vp9_subtract_block(4, 4, src_diff, 8,
+ src, src_stride,
+ dst, xd->plane[0].dst.stride);
+
+ tx_type = get_tx_type_4x4(xd, block);
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
+ x->quantize_b_4x4(x, block, tx_type, 16);
+ } else {
+ x->fwd_txm4x4(src_diff, coeff, 16);
+ x->quantize_b_4x4(x, block, tx_type, 16);
+ }
- tempa = ta;
- templ = tl;
+ ratey += cost_coeffs(cm, x, 0, block, PLANE_TYPE_Y_WITH_DC,
+ tempa + idx, templ + idy, TX_4X4, 16);
+ distortion += vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff,
+ block, 16), 16) >> 2;
- ratey = cost_coeffs(cm, x, 0, ib,
- PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
- rate += ratey;
- distortion = vp9_block_error(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
- 16) >> 2;
+ vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, mode,
+ dst, xd->plane[0].dst.stride);
+
+ if (best_tx_type != DCT_DCT)
+ vp9_short_iht4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
+ dst, xd->plane[0].dst.stride, best_tx_type);
+ else
+ xd->inv_txm4x4_add(BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
+ dst, xd->plane[0].dst.stride);
+ }
+ }
+ rate += ratey;
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
if (this_rd < best_rd) {
@@ -690,25 +675,37 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
best_rd = this_rd;
*best_mode = mode;
best_tx_type = tx_type;
- *a = tempa;
- *l = templ;
- vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
+ vpx_memcpy(a, tempa, sizeof(tempa));
+ vpx_memcpy(l, templ, sizeof(templ));
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ block = ib + idy * 2 + idx;
+ vpx_memcpy(best_dqcoeff[idy * 2 + idx],
+ BLOCK_OFFSET(xd->plane[0].dqcoeff, block, 16),
+ sizeof(best_dqcoeff[0]));
+ }
+ }
}
}
- xd->mode_info_context->bmi[ib].as_mode.first =
- (B_PREDICTION_MODE)(*best_mode);
-
- vp9_intra4x4_predict(xd, ib,
- BLOCK_SIZE_SB8X8,
- *best_mode,
- dst, xd->plane[0].dst.stride);
-
- // inverse transform
- if (best_tx_type != DCT_DCT) {
- vp9_short_iht4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride,
- best_tx_type);
- } else {
- xd->inv_txm4x4_add(best_dqcoeff, dst, xd->plane[0].dst.stride);
+
+ for (idy = 0; idy < bh; ++idy) {
+ for (idx = 0; idx < bw; ++idx) {
+ block = ib + idy * 2 + idx;
+ xd->mode_info_context->bmi[block].as_mode.first = *best_mode;
+ dst = raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, block,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride);
+
+ vp9_intra4x4_predict(xd, block, BLOCK_SIZE_SB8X8, *best_mode,
+ dst, xd->plane[0].dst.stride);
+ // inverse transform
+ if (best_tx_type != DCT_DCT)
+ vp9_short_iht4x4_add(best_dqcoeff[idy * 2 + idx], dst,
+ xd->plane[0].dst.stride, best_tx_type);
+ else
+ xd->inv_txm4x4_add(best_dqcoeff[idy * 2 + idx], dst,
+ xd->plane[0].dst.stride);
+ }
}
return best_rd;
@@ -717,17 +714,17 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int *Rate, int *rate_y,
int *Distortion, int64_t best_rd) {
- int i;
+ int i, j;
MACROBLOCKD *const xd = &mb->e_mbd;
-#if CONFIG_AB4X4
+ BLOCK_SIZE_TYPE bsize = xd->mode_info_context->mbmi.sb_type;
+ int bw = 1 << b_width_log2(bsize);
+ int bh = 1 << b_height_log2(bsize);
+ int idx, idy;
int cost = 0;
-#else
- int cost = mb->mbmode_cost[xd->frame_type][I4X4_PRED];
-#endif
int distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[2], t_left[2];
+ ENTROPY_CONTEXT t_above[4], t_left[4];
int *bmode_costs;
vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
@@ -736,31 +733,39 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
xd->mode_info_context->mbmi.mode = I4X4_PRED;
bmode_costs = mb->inter_bmode_costs;
- for (i = 0; i < 4; i++) {
- const int x_idx = i & 1, y_idx = i >> 1;
- MODE_INFO *const mic = xd->mode_info_context;
- const int mis = xd->mode_info_stride;
- B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
- int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
+ for (idy = 0; idy < 2; idy += bh) {
+ for (idx = 0; idx < 2; idx += bw) {
+ MODE_INFO *const mic = xd->mode_info_context;
+ const int mis = xd->mode_info_stride;
+ B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
+ int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry);
+ int UNINITIALIZED_IS_SAFE(d);
+ i = idy * 2 + idx;
- if (xd->frame_type == KEY_FRAME) {
- const B_PREDICTION_MODE A = above_block_mode(mic, i, mis);
- const B_PREDICTION_MODE L = left_block_mode(mic, i);
+ if (xd->frame_type == KEY_FRAME) {
+ const MB_PREDICTION_MODE A = above_block_mode(mic, i, mis);
+ const MB_PREDICTION_MODE L = (xd->left_available || idx) ?
+ left_block_mode(mic, i) : DC_PRED;
- bmode_costs = mb->bmode_costs[A][L];
- }
+ bmode_costs = mb->bmode_costs[A][L];
+ }
- total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
- t_above + x_idx, t_left + y_idx,
- &r, &ry, &d);
- cost += r;
- distortion += d;
- tot_rate_y += ry;
+ total_rd += rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
+ t_above + idx, t_left + idy,
+ &r, &ry, &d, bsize);
+ cost += r;
+ distortion += d;
+ tot_rate_y += ry;
- mic->bmi[i].as_mode.first = best_mode;
+ mic->bmi[i].as_mode.first = best_mode;
+ for (j = 1; j < bh; ++j)
+ mic->bmi[i + j * 2].as_mode.first = best_mode;
+ for (j = 1; j < bw; ++j)
+ mic->bmi[i + j].as_mode.first = best_mode;
- if (total_rd >= best_rd)
- break;
+ if (total_rd >= best_rd)
+ break;
+ }
}
if (total_rd >= best_rd)
@@ -780,18 +785,17 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t txfm_cache[NB_TXFM_MODES]) {
MB_PREDICTION_MODE mode;
MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ MACROBLOCKD *xd = &x->e_mbd;
int this_rate, this_rate_tokenonly;
int this_distortion, s;
int64_t best_rd = INT64_MAX, this_rd;
TX_SIZE UNINITIALIZED_IS_SAFE(best_tx);
int i;
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8) {
x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
return best_rd;
}
-#endif
for (i = 0; i < NB_TXFM_MODES; i++)
txfm_cache[i] = INT64_MAX;
@@ -799,13 +803,20 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
/* Y Search for 32x32 intra prediction mode */
for (mode = DC_PRED; mode <= TM_PRED; mode++) {
int64_t local_txfm_cache[NB_TXFM_MODES];
+ MODE_INFO *const mic = xd->mode_info_context;
+ const int mis = xd->mode_info_stride;
+ const MB_PREDICTION_MODE A = above_block_mode(mic, 0, mis);
+ const MB_PREDICTION_MODE L = xd->left_available ?
+ left_block_mode(mic, 0) : DC_PRED;
+
+ int *bmode_costs = x->bmode_costs[A][L];
x->e_mbd.mode_info_context->mbmi.mode = mode;
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
bsize, local_txfm_cache);
- this_rate = this_rate_tokenonly + x->mbmode_cost[x->e_mbd.frame_type][mode];
+ this_rate = this_rate_tokenonly + bmode_costs[mode];
this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
if (this_rd < best_rd) {
@@ -940,11 +951,9 @@ static int labels2mode(MACROBLOCK *x,
MB_MODE_INFO * mbmi = &mic->mbmi;
const int mis = xd->mode_info_stride;
int i, cost = 0, thismvcost = 0;
-#if CONFIG_AB4X4
int idx, idy;
int bw = 1 << b_width_log2(mbmi->sb_type);
int bh = 1 << b_height_log2(mbmi->sb_type);
-#endif
/* We have to be careful retrieving previously-encoded motion vectors.
Ones from this macroblock have to be pulled from the BLOCKD array
@@ -1028,7 +1037,6 @@ static int labels2mode(MACROBLOCK *x,
x->partition_info->bmi[i].mv.as_int = this_mv->as_int;
if (mbmi->second_ref_frame > 0)
x->partition_info->bmi[i].second_mv.as_int = this_second_mv->as_int;
-#if CONFIG_AB4X4
for (idy = 0; idy < bh; ++idy) {
for (idx = 0; idx < bw; ++idx) {
vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
@@ -1038,7 +1046,6 @@ static int labels2mode(MACROBLOCK *x,
sizeof(x->partition_info->bmi[i]));
}
}
-#endif
}
cost += thismvcost;
@@ -1059,9 +1066,6 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
int bwl = b_width_log2(bsize), bw = 1 << bwl;
int bhl = b_height_log2(bsize), bh = 1 << bhl;
int idx, idy;
-#if !CONFIG_AB4X4
- bw = 1, bh = 1;
-#endif
*labelyrate = 0;
*distortion = 0;
@@ -1229,18 +1233,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
ENTROPY_CONTEXT t_above[4], t_left[4];
ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
-#if !CONFIG_AB4X4
- bh = 1, bw = 1;
-#endif
-
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
-#if CONFIG_AB4X4
v_fn_ptr = &cpi->fn_ptr[get_block_size(4 << bwl, 4 << bhl)];
-#else
- v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
-#endif
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1249,19 +1245,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
-#if !CONFIG_AB4X4
- rate += vp9_cost_mv_ref(cpi, SPLITMV,
- mbmi->mb_mode_context[mbmi->ref_frame]);
- this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
- br += rate;
-#endif
other_segment_rd = this_segment_rd;
for (idy = 0; idy < 2; idy += bh) {
for (idx = 0; idx < 2; idx += bw) {
// TODO(jingning,rbultje): rewrite the rate-distortion optimization
- // loop for 4x4/4x8/8x4 block coding
-#if CONFIG_AB4X4
+ // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
+#if CONFIG_AB4X4 || 1
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
B_PREDICTION_MODE mode_selected = ZERO4X4;
@@ -1912,6 +1902,10 @@ static void setup_pred_block(const MACROBLOCKD *xd,
dst[1].buf = src->u_buffer;
dst[2].buf = src->v_buffer;
dst[1].stride = dst[2].stride = src->uv_stride;
+#if CONFIG_ALPHA
+ dst[3].buf = src->alpha_buffer;
+ dst[3].stride = src->alpha_stride;
+#endif
// TODO(jkoleszar): Make scale factors per-plane data
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -2108,157 +2102,145 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
ref_mv[1] = mbmi->ref_mvs[refs[1]][0];
if (is_comp_pred) {
-#if CONFIG_COMP_INTER_JOINT_SEARCH
- const int b_sz[BLOCK_SIZE_TYPES][2] = {
- {4, 4},
- {8, 8},
- {8, 16},
- {16, 8},
- {16, 16},
- {16, 32},
- {32, 16},
- {32, 32},
- {32, 64},
- {64, 32},
- {64, 64}
- };
-
- int ite;
- // Prediction buffer from second frame.
- uint8_t *second_pred = vpx_memalign(16, b_sz[bsize][0] *
- b_sz[bsize][1] * sizeof(uint8_t));
-
- // Do joint motion search in compound mode to get more accurate mv.
- struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
- struct buf_2d scaled_first_yv12;
- int last_besterr[2] = {INT_MAX, INT_MAX};
-
- if (scaled_ref_frame[0]) {
- int i;
-
- // Swap out the reference frame for a version that's been scaled to
- // match the resolution of the current frame, allowing the existing
- // motion search code to be used without additional modifications.
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_yv12[i] = xd->plane[i].pre[0];
-
- setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
- NULL, NULL);
- }
-
- if (scaled_ref_frame[1]) {
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++)
- backup_second_yv12[i] = xd->plane[i].pre[1];
-
- setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
- NULL, NULL);
- }
- xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
- mi_row, mi_col);
- xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
- mi_row, mi_col);
-
- scaled_first_yv12 = xd->plane[0].pre[0];
-
- // Initialize mv using single prediction mode result.
- frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int;
- frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int;
-
- // Allow joint search multiple times iteratively for each ref frame, and
- // break out the search loop if it couldn't find better mv.
- for (ite = 0; ite < 4; ite++) {
- struct buf_2d ref_yv12[2] = {xd->plane[0].pre[0],
- xd->plane[0].pre[1]};
- int bestsme = INT_MAX;
- int sadpb = x->sadperbit16;
- int_mv tmp_mv;
- int search_range = 3;
-
- int tmp_col_min = x->mv_col_min;
- int tmp_col_max = x->mv_col_max;
- int tmp_row_min = x->mv_row_min;
- int tmp_row_max = x->mv_row_max;
- int id = ite % 2;
-
- // Get pred block from second frame.
- vp9_build_inter_predictor(ref_yv12[!id].buf,
- ref_yv12[!id].stride,
- second_pred, b_sz[bsize][0],
- &frame_mv[NEWMV][refs[!id]],
- &xd->scale_factor[!id],
- b_sz[bsize][0], b_sz[bsize][1], 0,
- &xd->subpix);
-
- // Compound motion search on first ref frame.
- if (id)
- xd->plane[0].pre[0] = ref_yv12[id];
- vp9_clamp_mv_min_max(x, &ref_mv[id]);
-
- // Use mv result from single mode as mvp.
- tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int;
-
- tmp_mv.as_mv.col >>= 3;
- tmp_mv.as_mv.row >>= 3;
-
- // Small-range full-pixel motion search
- bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
- search_range,
- &cpi->fn_ptr[block_size],
- x->nmvjointcost, x->mvcost,
- &ref_mv[id], second_pred,
- b_sz[bsize][0], b_sz[bsize][1]);
+ if (cpi->sf.comp_inter_joint_serach) {
+ int pw = 4 << b_width_log2(bsize), ph = 4 << b_height_log2(bsize);
+ int ite;
+ // Prediction buffer from second frame.
+ uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
+
+ // Do joint motion search in compound mode to get more accurate mv.
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
+ struct buf_2d backup_second_yv12[MAX_MB_PLANE] = {{0}};
+ struct buf_2d scaled_first_yv12;
+ int last_besterr[2] = {INT_MAX, INT_MAX};
+
+ if (scaled_ref_frame[0]) {
+ int i;
+
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[0];
+
+ setup_pre_planes(xd, scaled_ref_frame[0], NULL, mi_row, mi_col,
+ NULL, NULL);
+ }
- x->mv_col_min = tmp_col_min;
- x->mv_col_max = tmp_col_max;
- x->mv_row_min = tmp_row_min;
- x->mv_row_max = tmp_row_max;
+ if (scaled_ref_frame[1]) {
+ int i;
- if (bestsme < INT_MAX) {
- int dis; /* TODO: use dis in distortion calculation later. */
- unsigned int sse;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_second_yv12[i] = xd->plane[i].pre[1];
- bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
- &ref_mv[id],
- x->errorperbit,
- &cpi->fn_ptr[block_size],
- x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred,
- b_sz[bsize][0],
- b_sz[bsize][1]);
+ setup_pre_planes(xd, scaled_ref_frame[1], NULL, mi_row, mi_col,
+ NULL, NULL);
}
+ xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
+ mi_row, mi_col);
+ xd->scale_factor[1].set_scaled_offsets(&xd->scale_factor[1],
+ mi_row, mi_col);
+
+ scaled_first_yv12 = xd->plane[0].pre[0];
+
+ // Initialize mv using single prediction mode result.
+ frame_mv[NEWMV][refs[0]].as_int = single_newmv[refs[0]].as_int;
+ frame_mv[NEWMV][refs[1]].as_int = single_newmv[refs[1]].as_int;
+
+ // Allow joint search multiple times iteratively for each ref frame
+ // and break out the search loop if it couldn't find better mv.
+ for (ite = 0; ite < 4; ite++) {
+ struct buf_2d ref_yv12[2];
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit16;
+ int_mv tmp_mv;
+ int search_range = 3;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+ int id = ite % 2;
+
+ // Initialized here because of compiler problem in Visual Studio.
+ ref_yv12[0] = xd->plane[0].pre[0];
+ ref_yv12[1] = xd->plane[0].pre[1];
+
+ // Get pred block from second frame.
+ vp9_build_inter_predictor(ref_yv12[!id].buf,
+ ref_yv12[!id].stride,
+ second_pred, pw,
+ &frame_mv[NEWMV][refs[!id]],
+ &xd->scale_factor[!id],
+ pw, ph, 0,
+ &xd->subpix);
+
+ // Compound motion search on first ref frame.
+ if (id)
+ xd->plane[0].pre[0] = ref_yv12[id];
+ vp9_clamp_mv_min_max(x, &ref_mv[id]);
+
+ // Use mv result from single mode as mvp.
+ tmp_mv.as_int = frame_mv[NEWMV][refs[id]].as_int;
+
+ tmp_mv.as_mv.col >>= 3;
+ tmp_mv.as_mv.row >>= 3;
+
+ // Small-range full-pixel motion search
+ bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
+ search_range,
+ &cpi->fn_ptr[block_size],
+ x->nmvjointcost, x->mvcost,
+ &ref_mv[id], second_pred,
+ pw, ph);
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+
+ bestsme = vp9_find_best_sub_pixel_comp(x, &tmp_mv,
+ &ref_mv[id],
+ x->errorperbit,
+ &cpi->fn_ptr[block_size],
+ x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred,
+ pw, ph);
+ }
- if (id)
- xd->plane[0].pre[0] = scaled_first_yv12;
+ if (id)
+ xd->plane[0].pre[0] = scaled_first_yv12;
- if (bestsme < last_besterr[id]) {
+ if (bestsme < last_besterr[id]) {
frame_mv[NEWMV][refs[id]].as_int =
- xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int;
- last_besterr[id] = bestsme;
- } else {
- break;
+ xd->mode_info_context->bmi[0].as_mv[1].as_int = tmp_mv.as_int;
+ last_besterr[id] = bestsme;
+ } else {
+ break;
+ }
}
- }
- // restore the predictor
- if (scaled_ref_frame[0]) {
- int i;
+ // restore the predictor
+ if (scaled_ref_frame[0]) {
+ int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[0] = backup_yv12[i];
- }
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
+ }
- if (scaled_ref_frame[1]) {
- int i;
+ if (scaled_ref_frame[1]) {
+ int i;
- for (i = 0; i < MAX_MB_PLANE; i++)
- xd->plane[i].pre[1] = backup_second_yv12[i];
- }
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[1] = backup_second_yv12[i];
+ }
- vpx_free(second_pred);
-#endif // CONFIG_COMP_INTER_JOINT_SEARCH
+ vpx_free(second_pred);
+ }
if (frame_mv[NEWMV][refs[0]].as_int == INVALID_MV ||
frame_mv[NEWMV][refs[1]].as_int == INVALID_MV)
@@ -2577,11 +2559,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
&dist_uv, &uv_skip,
(bsize < BLOCK_SIZE_SB8X8) ? BLOCK_SIZE_SB8X8 :
bsize);
-#if CONFIG_AB4X4
if (bsize < BLOCK_SIZE_SB8X8)
-#else
- if (bsize == BLOCK_SIZE_SB8X8)
-#endif
err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
&rate4x4_y_tokenonly,
&dist4x4_y, err);
@@ -2593,11 +2571,7 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
memset(ctx->txfm_rd_diff, 0, sizeof(ctx->txfm_rd_diff));
xd->mode_info_context->mbmi.mode = mode;
xd->mode_info_context->mbmi.txfm_size = txfm_size;
-#if CONFIG_AB4X4
} else if (bsize < BLOCK_SIZE_SB8X8 && err4x4 < err) {
-#else
- } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) {
-#endif
*returnrate = rate4x4_y + rate_uv +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist4x4_y + (dist_uv >> 2);
@@ -2762,17 +2736,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
txfm_cache[i] = INT64_MAX;
// Test best rd so far against threshold for trying this mode.
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(best_rd < cpi->rd_threshes[mode_index] ||
cpi->rd_threshes[mode_index] == INT_MAX))
continue;
-#else
- if (best_rd <= cpi->rd_threshes[mode_index] ||
- cpi->rd_threshes[mode_index] == INT_MAX) {
- continue;
- }
-#endif
x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
@@ -2783,11 +2750,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
-#if CONFIG_AB4X4
if (cpi->speed > 0 && bsize >= BLOCK_SIZE_SB8X8) {
-#else
- if (cpi->speed > 0) {
-#endif
if (!(ref_frame_mask & (1 << ref_frame))) {
continue;
}
@@ -2833,18 +2796,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interp_filter = cm->mcomp_filter_type;
vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-#if CONFIG_AB4X4
if (bsize >= BLOCK_SIZE_SB8X8 &&
(this_mode == I4X4_PRED || this_mode == SPLITMV))
continue;
if (bsize < BLOCK_SIZE_SB8X8 &&
!(this_mode == I4X4_PRED || this_mode == SPLITMV))
continue;
-#else
- if (bsize != BLOCK_SIZE_SB8X8 &&
- (this_mode == I4X4_PRED || this_mode == SPLITMV))
- continue;
-#endif
if (comp_pred) {
if (ref_frame == ALTREF_FRAME) {
@@ -2919,11 +2876,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += dist_uv[TX_4X4];
distortion_uv = dist_uv[TX_4X4];
mbmi->uv_mode = mode_uv[TX_4X4];
-#if CONFIG_AB4X4
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
-#endif
} else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
vp9_build_intra_predictors_sby_s(xd, bsize);
@@ -3057,11 +3012,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
-#if CONFIG_AB4X4
txfm_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
for (i = 0; i < NB_TXFM_MODES; ++i)
txfm_cache[i] = txfm_cache[ONLY_4X4];
-#endif
if (!mode_excluded) {
if (is_comp_pred)
@@ -3117,11 +3070,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Is Mb level skip allowed (i.e. not coded at segment level).
mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
-#if CONFIG_AB4X4
if (skippable && bsize >= BLOCK_SIZE_SB8X8) {
-#else
- if (skippable) {
-#endif
// Back out the coefficient coding costs
rate2 -= (rate_y + rate_uv);
// for best_yrd calculation
@@ -3302,13 +3251,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
-#if CONFIG_AB4X4
if (best_rd == INT64_MAX && bsize < BLOCK_SIZE_SB8X8) {
*returnrate = INT_MAX;
*returndistortion = INT_MAX;
return best_rd;
}
-#endif
assert((cm->mcomp_filter_type == SWITCHABLE) ||
(cm->mcomp_filter_type == best_mbmode.interp_filter) ||
@@ -3341,10 +3288,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
cpi->is_src_frame_alt_ref &&
(cpi->oxcf.arnr_max_frames == 0) &&
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)
-#if CONFIG_AB4X4
- && bsize >= BLOCK_SIZE_SB8X8
-#endif
- ) {
+ && bsize >= BLOCK_SIZE_SB8X8) {
mbmi->mode = ZEROMV;
mbmi->ref_frame = ALTREF_FRAME;
mbmi->second_ref_frame = NONE;
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c
index 994828f20..6b1ba4964 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad_c.c
@@ -593,6 +593,37 @@ void vp9_sad8x4x4d_c(const uint8_t *src_ptr,
ref_ptr[3], ref_stride, 0x7fffffff);
}
+void vp9_sad8x4x8_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad8x4(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t* const ref_ptr[],
@@ -608,6 +639,37 @@ void vp9_sad4x8x4d_c(const uint8_t *src_ptr,
ref_ptr[3], ref_stride, 0x7fffffff);
}
+void vp9_sad4x8x8_c(const uint8_t *src_ptr,
+ int src_stride,
+ const uint8_t *ref_ptr,
+ int ref_stride,
+ uint32_t *sad_array) {
+ sad_array[0] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr, ref_stride,
+ 0x7fffffff);
+ sad_array[1] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 1, ref_stride,
+ 0x7fffffff);
+ sad_array[2] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 2, ref_stride,
+ 0x7fffffff);
+ sad_array[3] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 3, ref_stride,
+ 0x7fffffff);
+ sad_array[4] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 4, ref_stride,
+ 0x7fffffff);
+ sad_array[5] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 5, ref_stride,
+ 0x7fffffff);
+ sad_array[6] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 6, ref_stride,
+ 0x7fffffff);
+ sad_array[7] = vp9_sad4x8(src_ptr, src_stride,
+ ref_ptr + 7, ref_stride,
+ 0x7fffffff);
+}
+
void vp9_sad4x4x4d_c(const uint8_t *src_ptr,
int src_stride,
const uint8_t* const ref_ptr[],
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 30143d77d..47792fcc2 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -205,9 +205,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
DECLARE_ALIGNED_ARRAY(16, uint8_t, predictor, 16 * 16 + 8 * 8 + 8 * 8);
// Save input state
- uint8_t *y_buffer = mbd->plane[0].pre[0].buf;
- uint8_t *u_buffer = mbd->plane[1].pre[0].buf;
- uint8_t *v_buffer = mbd->plane[2].pre[0].buf;
+ uint8_t* input_buffer[MAX_MB_PLANE];
+ int i;
+
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ input_buffer[i] = mbd->plane[i].pre[0].buf;
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
#if ALT_REF_MC_ENABLED
@@ -352,9 +354,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
}
// Restore input state
- mbd->plane[0].pre[0].buf = y_buffer;
- mbd->plane[1].pre[0].buf = u_buffer;
- mbd->plane[2].pre[0].buf = v_buffer;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ mbd->plane[i].pre[0].buf = input_buffer[i];
}
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 9ed16ffc3..08efc84d4 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -119,12 +119,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
TOKENEXTRA *t = *tp; /* store tokens starting here */
const int eob = xd->plane[plane].eobs[block];
const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
-#if CONFIG_AB4X4
const BLOCK_SIZE_TYPE sb_type = (mbmi->sb_type < BLOCK_SIZE_SB8X8) ?
BLOCK_SIZE_SB8X8 : mbmi->sb_type;
-#else
- const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
-#endif
const int bwl = b_width_log2(sb_type);
const int off = block >> (2 * tx_size);
const int mod = bwl - tx_size - xd->plane[plane].subsampling_x;
@@ -136,11 +132,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
const int segment_id = mbmi->segment_id;
const int *scan, *nb;
vp9_coeff_count *counts;
-#if CONFIG_MODELCOEFPROB
vp9_coeff_probs_model *coef_probs;
-#else
- vp9_coeff_probs *coef_probs;
-#endif
const int ref = mbmi->ref_frame != INTRA_FRAME;
ENTROPY_CONTEXT above_ec, left_ec;
uint8_t token_cache[1024];
@@ -194,7 +186,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
above_ec = (A[0] + A[1] + A[2] + A[3] + A[4] + A[5] + A[6] + A[7]) != 0;
left_ec = (L[0] + L[1] + L[2] + L[3] + L[4] + L[5] + L[6] + L[7]) != 0;
seg_eob = 1024;
- scan = vp9_default_zig_zag1d_32x32;
+ scan = vp9_default_scan_32x32;
counts = cpi->coef_counts_32x32;
coef_probs = cpi->common.fc.coef_probs_32x32;
band_translate = vp9_coefband_trans_8x8plus;
@@ -228,10 +220,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
t->token = token;
t->context_tree = coef_probs[type][ref][band][pt];
-#if CONFIG_MODELCOEFPROB
- t->block_type = type;
- t->ref_type = ref;
-#endif
t->skip_eob_node = (c > 0) && (token_cache[scan[c - 1]] == 0);
assert(vp9_coef_encodings[t->token].len - t->skip_eob_node > 0);
diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h
index 2a56da8d1..08236c429 100644
--- a/vp9/encoder/vp9_tokenize.h
+++ b/vp9/encoder/vp9_tokenize.h
@@ -26,10 +26,6 @@ typedef struct {
int16_t extra;
uint8_t token;
uint8_t skip_eob_node;
-#if CONFIG_MODELCOEFPROB
- uint8_t block_type;
- uint8_t ref_type;
-#endif
} TOKENEXTRA;
typedef int64_t vp9_coeff_accum[REF_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 306476b01..aaa43ef82 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -86,7 +86,6 @@ typedef struct vp9_variance_vtable {
vp9_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
-// #if CONFIG_COMP_INTER_JOINT_SEARCH
static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight,
int height, uint8_t *ref, int ref_stride) {
int i, j;
@@ -102,5 +101,4 @@ static void comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int weight,
ref += ref_stride;
}
}
-// #endif // CONFIG_COMP_INTER_JOINT_SEARCH
#endif // VP9_ENCODER_VP9_VARIANCE_H_
diff --git a/vp9/encoder/x86/vp9_variance_sse2.c b/vp9/encoder/x86/vp9_variance_sse2.c
index fc363b6b0..67ca9257c 100644
--- a/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/vp9/encoder/x86/vp9_variance_sse2.c
@@ -139,7 +139,37 @@ void vp9_half_vert_variance16x_h_sse2
DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]);
-unsigned int vp9_variance4x4_wmt(
+typedef unsigned int (*get_var_sse2) (
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *SSE,
+ int *Sum
+);
+
+static void variance_sse2(const unsigned char *src_ptr, int source_stride,
+ const unsigned char *ref_ptr, int recon_stride,
+ int w, int h, unsigned int *sse, int *sum,
+ get_var_sse2 var_fn, int block_size) {
+ unsigned int sse0;
+ int sum0;
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ var_fn(src_ptr + source_stride * i + j, source_stride,
+ ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0);
+ *sse += sse0;
+ *sum += sum0;
+ }
+ }
+}
+
+unsigned int vp9_variance4x4_sse2(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
@@ -148,13 +178,41 @@ unsigned int vp9_variance4x4_wmt(
unsigned int var;
int avg;
- vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
+ &var, &avg, vp9_get4x4var_mmx, 4);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
+}
+
+unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4,
+ &var, &avg, vp9_get4x4var_mmx, 4);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
+}
+
+unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8,
+ &var, &avg, vp9_get4x4var_mmx, 4);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
}
-unsigned int vp9_variance8x8_wmt
+unsigned int vp9_variance8x8_sse2
(
const unsigned char *src_ptr,
int source_stride,
@@ -164,83 +222,157 @@ unsigned int vp9_variance8x8_wmt
unsigned int var;
int avg;
- vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
+ &var, &avg, vp9_get8x8var_sse2, 8);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
-
}
-
-unsigned int vp9_variance16x16_wmt
+unsigned int vp9_variance16x8_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
- unsigned int sse0;
- int sum0;
-
+ unsigned int var;
+ int avg;
- vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
- *sse = sse0;
- return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
+ &var, &avg, vp9_get8x8var_sse2, 8);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
}
-unsigned int vp9_mse16x16_wmt(
+unsigned int vp9_variance8x16_sse2
+(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
+ unsigned int var;
+ int avg;
- unsigned int sse0;
- int sum0;
- vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
- *sse = sse0;
- return sse0;
-
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
+ &var, &avg, vp9_get8x8var_sse2, 8);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
}
-
-unsigned int vp9_variance16x8_wmt
+unsigned int vp9_variance16x16_sse2
(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
- vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
+ unsigned int var;
+ int avg;
- var = sse0 + sse1;
- avg = sum0 + sum1;
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
+ &var, &avg, vp9_get16x16var_sse2, 16);
*sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-
+ return (var - (((unsigned int)avg * avg) >> 8));
}
-unsigned int vp9_variance8x16_wmt
-(
+unsigned int vp9_mse16x16_wmt(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
- vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0);
- vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1);
+ unsigned int sse0;
+ int sum0;
+ vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
+ &sum0);
+ *sse = sse0;
+ return sse0;
+}
- var = sse0 + sse1;
- avg = sum0 + sum1;
+unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32,
+ &var, &avg, vp9_get16x16var_sse2, 16);
*sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
+ return (var - (((int64_t)avg * avg) >> 10));
+}
+
+unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16,
+ &var, &avg, vp9_get16x16var_sse2, 16);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 9));
+}
+unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32,
+ &var, &avg, vp9_get16x16var_sse2, 16);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 9));
+}
+
+unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64,
+ &var, &avg, vp9_get16x16var_sse2, 16);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 12));
+}
+
+unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32,
+ &var, &avg, vp9_get16x16var_sse2, 16);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 11));
+}
+
+unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64,
+ &var, &avg, vp9_get16x16var_sse2, 16);
+ *sse = var;
+ return (var - (((int64_t)avg * avg) >> 11));
}
unsigned int vp9_sub_pixel_variance4x4_wmt
diff --git a/vp9/encoder/x86/vp9_variance_ssse3.c b/vp9/encoder/x86/vp9_variance_ssse3.c
index f95a5423c..882acad78 100644
--- a/vp9/encoder/x86/vp9_variance_ssse3.c
+++ b/vp9/encoder/x86/vp9_variance_ssse3.c
@@ -15,15 +15,6 @@
#define HALFNDX 8
-extern unsigned int vp9_get16x16var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
extern void vp9_half_horiz_vert_variance16x_h_sse2
(
const unsigned char *ref_ptr,