summaryrefslogtreecommitdiff
path: root/vp9/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder')
-rw-r--r--vp9/encoder/vp9_bitstream.c452
-rw-r--r--vp9/encoder/vp9_block.h12
-rw-r--r--vp9/encoder/vp9_encodeframe.c800
-rw-r--r--vp9/encoder/vp9_encodeintra.c214
-rw-r--r--vp9/encoder/vp9_encodeintra.h4
-rw-r--r--vp9/encoder/vp9_encodemb.c641
-rw-r--r--vp9/encoder/vp9_encodemb.h54
-rw-r--r--vp9/encoder/vp9_firstpass.c75
-rw-r--r--vp9/encoder/vp9_mbgraph.c11
-rw-r--r--vp9/encoder/vp9_modecosts.c2
-rw-r--r--vp9/encoder/vp9_onyx_if.c74
-rw-r--r--vp9/encoder/vp9_onyx_int.h26
-rw-r--r--vp9/encoder/vp9_quantize.c228
-rw-r--r--vp9/encoder/vp9_quantize.h19
-rw-r--r--vp9/encoder/vp9_ratectrl.c4
-rw-r--r--vp9/encoder/vp9_rdopt.c2791
-rw-r--r--vp9/encoder/vp9_rdopt.h7
-rw-r--r--vp9/encoder/vp9_segmentation.c122
-rw-r--r--vp9/encoder/vp9_temporal_filter.c2
-rw-r--r--vp9/encoder/vp9_tokenize.c11
-rw-r--r--vp9/encoder/vp9_variance_c.c26
-rw-r--r--vp9/encoder/x86/vp9_quantize_mmx.asm286
-rw-r--r--vp9/encoder/x86/vp9_quantize_sse2.asm379
-rw-r--r--vp9/encoder/x86/vp9_quantize_sse4.asm253
-rw-r--r--vp9/encoder/x86/vp9_quantize_ssse3.asm137
-rw-r--r--vp9/encoder/x86/vp9_quantize_x86.h48
26 files changed, 1537 insertions, 5141 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 3ab67cd8c..6624f07eb 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -281,10 +281,6 @@ static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) {
write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m);
}
-static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) {
- write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m);
-}
-
static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) {
write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m);
}
@@ -302,10 +298,6 @@ static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) {
write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m);
}
-static void write_split(vp9_writer *bc, int x, const vp9_prob *p) {
- write_token(bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x);
-}
-
static int prob_update_savings(const unsigned int *ct,
const vp9_prob oldp, const vp9_prob newp,
const vp9_prob upd) {
@@ -508,60 +500,9 @@ static void write_nmv(VP9_COMP *cpi, vp9_writer *bc,
// It should only be called if a segment map update is indicated.
static void write_mb_segid(vp9_writer *bc,
const MB_MODE_INFO *mi, const MACROBLOCKD *xd) {
- // Encode the MB segment id.
- int seg_id = mi->segment_id;
-
- if (xd->segmentation_enabled && xd->update_mb_segmentation_map) {
- switch (seg_id) {
- case 0:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
- break;
- case 1:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[2]);
- break;
- case 2:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[3]);
- break;
- case 3:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[3]);
- break;
- case 4:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[5]);
- break;
- case 5:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[5]);
- break;
- case 6:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[6]);
- break;
- case 7:
- vp9_write(bc, 1, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[4]);
- vp9_write(bc, 1, xd->mb_segment_tree_probs[6]);
- break;
-
- // TRAP.. This should not happen
- default:
- vp9_write(bc, 0, xd->mb_segment_tree_probs[0]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[1]);
- vp9_write(bc, 0, xd->mb_segment_tree_probs[2]);
- break;
- }
- }
+ if (xd->segmentation_enabled && xd->update_mb_segmentation_map)
+ treed_write(bc, vp9_segment_tree, xd->mb_segment_tree_probs,
+ mi->segment_id, 3);
}
// This function encodes the reference frame
@@ -718,7 +659,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
active_section = 6;
#endif
- if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
+ if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob);
else
write_ymode(bc, mode, pc->fc.ymode_prob);
@@ -728,21 +669,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
do {
write_bmode(bc, m->bmi[j].as_mode.first,
pc->fc.bmode_prob);
- } while (++j < 16);
- }
- if (mode == I8X8_PRED) {
- write_i8x8_mode(bc, m->bmi[0].as_mode.first,
- pc->fc.i8x8_mode_prob);
- write_i8x8_mode(bc, m->bmi[2].as_mode.first,
- pc->fc.i8x8_mode_prob);
- write_i8x8_mode(bc, m->bmi[8].as_mode.first,
- pc->fc.i8x8_mode_prob);
- write_i8x8_mode(bc, m->bmi[10].as_mode.first,
- pc->fc.i8x8_mode_prob);
- } else {
- write_uv_mode(bc, mi->uv_mode,
- pc->fc.uv_mode_prob[mode]);
+ } while (++j < 4);
}
+ write_uv_mode(bc, mi->uv_mode,
+ pc->fc.uv_mode_prob[mode]);
} else {
vp9_prob mv_ref_p[VP9_MVREFS - 1];
@@ -754,7 +684,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
// If segment skip is not enabled code the mode.
if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) {
- if (mi->sb_type > BLOCK_SIZE_MB16X16) {
+ if (mi->sb_type > BLOCK_SIZE_SB8X8) {
write_sb_mv_ref(bc, mode, mv_ref_p);
} else {
write_mv_ref(bc, mode, mv_ref_p);
@@ -824,26 +754,16 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
++count_mb_seg[mi->partitioning];
#endif
- write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob);
- cpi->mbsplit_count[mi->partitioning]++;
-
do {
B_PREDICTION_MODE blockmode;
int_mv blockmv;
- const int *const L = vp9_mbsplits[mi->partitioning];
int k = -1; /* first block in subset j */
int mv_contz;
int_mv leftmv, abovemv;
blockmode = cpi->mb.partition_info->bmi[j].mode;
blockmv = cpi->mb.partition_info->bmi[j].mv;
-#if CONFIG_DEBUG
- while (j != L[++k])
- if (k >= 16)
- assert(0);
-#else
- while (j != L[++k]);
-#endif
+ k = j;
leftmv.as_int = left_block_mv(xd, m, k);
abovemv.as_int = above_block_mv(m, k, mis);
mv_contz = vp9_mv_cont(&leftmv, &abovemv);
@@ -875,16 +795,15 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m,
}
}
- if (((rf == INTRA_FRAME && mode <= I8X8_PRED) ||
- (rf != INTRA_FRAME && !(mode == SPLITMV &&
- mi->partitioning == PARTITIONING_4X4))) &&
+ if (((rf == INTRA_FRAME && mode != I4X4_PRED) ||
+ (rf != INTRA_FRAME && mode != SPLITMV)) &&
pc->txfm_mode == TX_MODE_SELECT &&
- !(skip_coeff || vp9_segfeature_active(xd, segment_id,
- SEG_LVL_SKIP))) {
+ !(skip_coeff || vp9_segfeature_active(xd, segment_id,
+ SEG_LVL_SKIP))) {
TX_SIZE sz = mi->txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]);
- if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) {
+ if (mi->sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) {
vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]);
if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]);
@@ -912,7 +831,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP));
}
- if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16)
+ if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8)
sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
else
kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]);
@@ -921,35 +840,26 @@ static void write_mb_modes_kf(const VP9_COMP *cpi,
int i = 0;
do {
const B_PREDICTION_MODE a = above_block_mode(m, i, mis);
- const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ?
+ const B_PREDICTION_MODE l = (xd->left_available ||
+ (i & 1)) ?
left_block_mode(m, i) : B_DC_PRED;
const int bm = m->bmi[i].as_mode.first;
#ifdef ENTROPY_STATS
++intra_mode_stats [A] [L] [bm];
#endif
-
write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]);
- } while (++i < 16);
+ } while (++i < 4);
}
- if (ym == I8X8_PRED) {
- write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->fc.i8x8_mode_prob);
- // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout);
- write_i8x8_mode(bc, m->bmi[2].as_mode.first, c->fc.i8x8_mode_prob);
- // printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout);
- write_i8x8_mode(bc, m->bmi[8].as_mode.first, c->fc.i8x8_mode_prob);
- // printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout);
- write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->fc.i8x8_mode_prob);
- // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout);
- } else
- write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
- if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT &&
+ write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+
+ if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT &&
!(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
TX_SIZE sz = m->mbmi.txfm_size;
// FIXME(rbultje) code ternary symbol once all experiments are merged
vp9_write(bc, sz != TX_4X4, c->prob_tx[0]);
- if (sz != TX_4X4 && ym <= TM_PRED) {
+ if (m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) {
vp9_write(bc, sz != TX_8X8, c->prob_tx[1]);
if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8)
vp9_write(bc, sz != TX_16X16, c->prob_tx[2]);
@@ -1162,45 +1072,34 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
else
assert(0);
- if (bsize > BLOCK_SIZE_MB16X16) {
+ if (bsize > BLOCK_SIZE_SB8X8) {
int pl;
xd->left_seg_context =
- cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
- xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
+ cm->left_seg_context + ((mi_row >> 1) & 3);
+ xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
pl = partition_plane_context(xd, bsize);
// encode the partition information
write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl],
vp9_partition_encodings + partition);
}
+ subsize = get_subsize(bsize, partition);
+
switch (partition) {
case PARTITION_NONE:
- subsize = bsize;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
break;
case PARTITION_HORZ:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 :
- BLOCK_SIZE_SB32X16;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
if ((mi_row + bh) < cm->mi_rows)
write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mi_row + bh, mi_col);
break;
case PARTITION_VERT:
- subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 :
- BLOCK_SIZE_SB16X32;
write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col);
if ((mi_col + bw) < cm->mi_cols)
write_modes_b(cpi, m + bw, bc, tok, tok_end, mi_row, mi_col + bw);
break;
case PARTITION_SPLIT:
- // TODO(jingning): support recursive partitioning down to 16x16 as for
- // now. need to merge in 16x8, 8x16, 8x8, and smaller partitions.
- if (bsize == BLOCK_SIZE_SB64X64)
- subsize = BLOCK_SIZE_SB32X32;
- else if (bsize == BLOCK_SIZE_SB32X32)
- subsize = BLOCK_SIZE_MB16X16;
- else
- assert(0);
for (n = 0; n < 4; n++) {
int j = n >> 1, i = n & 0x01;
write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end,
@@ -1212,11 +1111,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc,
}
// update partition context
- if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32))
+ if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16))
return;
- xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
- xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
+ xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
+ xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
update_partition_context(xd, subsize, bsize);
}
@@ -1233,12 +1132,12 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc,
for (mi_row = c->cur_tile_mi_row_start;
mi_row < c->cur_tile_mi_row_end;
- mi_row += (4 << CONFIG_SB8X8), m_ptr += (4 << CONFIG_SB8X8) * mis) {
+ mi_row += 8, m_ptr += 8 * mis) {
m = m_ptr;
vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context));
for (mi_col = c->cur_tile_mi_col_start;
mi_col < c->cur_tile_mi_col_end;
- mi_col += (4 << CONFIG_SB8X8), m += (4 << CONFIG_SB8X8))
+ mi_col += 8, m += 8)
write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col,
BLOCK_SIZE_SB64X64);
}
@@ -1719,16 +1618,147 @@ static void segment_reference_frames(VP9_COMP *cpi) {
}
}
-void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
- unsigned long *size) {
+static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) {
+ int i;
+
+ // Write out loop filter deltas applied at the MB level based on mode or
+ // ref frame (if they are enabled).
+ vp9_write_bit(w, xd->mode_ref_lf_delta_enabled);
+
+ if (xd->mode_ref_lf_delta_enabled) {
+ // Do the deltas need to be updated
+ vp9_write_bit(w, xd->mode_ref_lf_delta_update);
+ if (xd->mode_ref_lf_delta_update) {
+ // Send update
+ for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
+ const int delta = xd->ref_lf_deltas[i];
+
+ // Frame level data
+ if (delta != xd->last_ref_lf_deltas[i]) {
+ xd->last_ref_lf_deltas[i] = delta;
+ vp9_write_bit(w, 1);
+
+ if (delta > 0) {
+ vp9_write_literal(w, delta & 0x3F, 6);
+ vp9_write_bit(w, 0); // sign
+ } else {
+ assert(delta < 0);
+ vp9_write_literal(w, (-delta) & 0x3F, 6);
+ vp9_write_bit(w, 1); // sign
+ }
+ } else {
+ vp9_write_bit(w, 0);
+ }
+ }
+
+ // Send update
+ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
+ const int delta = xd->mode_lf_deltas[i];
+ if (delta != xd->last_mode_lf_deltas[i]) {
+ xd->last_mode_lf_deltas[i] = delta;
+ vp9_write_bit(w, 1);
+
+ if (delta > 0) {
+ vp9_write_literal(w, delta & 0x3F, 6);
+ vp9_write_bit(w, 0); // sign
+ } else {
+ assert(delta < 0);
+ vp9_write_literal(w, (-delta) & 0x3F, 6);
+ vp9_write_bit(w, 1); // sign
+ }
+ } else {
+ vp9_write_bit(w, 0);
+ }
+ }
+ }
+ }
+}
+
+static void encode_segmentation(VP9_COMP *cpi, vp9_writer *w) {
int i, j;
+ VP9_COMMON *const pc = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
+ vp9_write_bit(w, xd->segmentation_enabled);
+ if (!xd->segmentation_enabled)
+ return;
+
+ // Segmentation map
+ vp9_write_bit(w, xd->update_mb_segmentation_map);
+#if CONFIG_IMPLICIT_SEGMENTATION
+ vp9_write_bit(w, xd->allow_implicit_segment_update);
+#endif
+ if (xd->update_mb_segmentation_map) {
+ // Select the coding strategy (temporal or spatial)
+ vp9_choose_segmap_coding_method(cpi);
+ // Write out probabilities used to decode unpredicted macro-block segments
+ for (i = 0; i < MB_SEG_TREE_PROBS; i++) {
+ const int prob = xd->mb_segment_tree_probs[i];
+ if (prob != MAX_PROB) {
+ vp9_write_bit(w, 1);
+ vp9_write_prob(w, prob);
+ } else {
+ vp9_write_bit(w, 0);
+ }
+ }
+
+ // Write out the chosen coding method.
+ vp9_write_bit(w, pc->temporal_update);
+ if (pc->temporal_update) {
+ for (i = 0; i < PREDICTION_PROBS; i++) {
+ const int prob = pc->segment_pred_probs[i];
+ if (prob != MAX_PROB) {
+ vp9_write_bit(w, 1);
+ vp9_write_prob(w, prob);
+ } else {
+ vp9_write_bit(w, 0);
+ }
+ }
+ }
+ }
+
+ // Segmentation data
+ vp9_write_bit(w, xd->update_mb_segmentation_data);
+ // segment_reference_frames(cpi);
+ if (xd->update_mb_segmentation_data) {
+ vp9_write_bit(w, xd->mb_segment_abs_delta);
+
+ for (i = 0; i < MAX_MB_SEGMENTS; i++) {
+ for (j = 0; j < SEG_LVL_MAX; j++) {
+ const int data = vp9_get_segdata(xd, i, j);
+ const int data_max = vp9_seg_feature_data_max(j);
+
+ if (vp9_segfeature_active(xd, i, j)) {
+ vp9_write_bit(w, 1);
+
+ if (vp9_is_segfeature_signed(j)) {
+ if (data < 0) {
+ vp9_encode_unsigned_max(w, -data, data_max);
+ vp9_write_bit(w, 1);
+ } else {
+ vp9_encode_unsigned_max(w, data, data_max);
+ vp9_write_bit(w, 0);
+ }
+ } else {
+ vp9_encode_unsigned_max(w, data, data_max);
+ }
+ } else {
+ vp9_write_bit(w, 0);
+ }
+ }
+ }
+ }
+}
+
+void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) {
+ int i;
VP9_HEADER oh;
VP9_COMMON *const pc = &cpi->common;
vp9_writer header_bc, residual_bc;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
int extra_bytes_packed = 0;
- unsigned char *cx_data = dest;
+ uint8_t *cx_data = dest;
oh.show_frame = (int) pc->show_frame;
oh.type = (int)pc->frame_type;
@@ -1798,60 +1828,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
}
#endif
- // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled).
- vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0);
-
- if (xd->mode_ref_lf_delta_enabled) {
- // Do the deltas need to be updated
- vp9_write_bit(&header_bc, xd->mode_ref_lf_delta_update);
- if (xd->mode_ref_lf_delta_update) {
- // Send update
- for (i = 0; i < MAX_REF_LF_DELTAS; i++) {
- const int delta = xd->ref_lf_deltas[i];
-
- // Frame level data
- if (delta != xd->last_ref_lf_deltas[i]) {
- xd->last_ref_lf_deltas[i] = delta;
- vp9_write_bit(&header_bc, 1);
-
- if (delta > 0) {
- vp9_write_literal(&header_bc, delta & 0x3F, 6);
- vp9_write_bit(&header_bc, 0); // sign
- } else {
- assert(delta < 0);
- vp9_write_literal(&header_bc, (-delta) & 0x3F, 6);
- vp9_write_bit(&header_bc, 1); // sign
- }
- } else {
- vp9_write_bit(&header_bc, 0);
- }
- }
-
- // Send update
- for (i = 0; i < MAX_MODE_LF_DELTAS; i++) {
- const int delta = xd->mode_lf_deltas[i];
-
- if (delta != xd->last_mode_lf_deltas[i]) {
- xd->last_mode_lf_deltas[i] = delta;
- vp9_write_bit(&header_bc, 1);
-
- if (delta > 0) {
- vp9_write_literal(&header_bc, delta & 0x3F, 6);
- vp9_write_bit(&header_bc, 0); // sign
- } else {
- assert(delta < 0);
- vp9_write_literal(&header_bc, (-delta) & 0x3F, 6);
- vp9_write_bit(&header_bc, 1); // sign
- }
- } else {
- vp9_write_bit(&header_bc, 0);
- }
- }
- }
- }
-
- // TODO(jkoleszar): remove these unused bits
- vp9_write_literal(&header_bc, 0, 2);
+ encode_loopfilter(xd, &header_bc);
// Frame Q baseline quantizer index
vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS);
@@ -1904,9 +1881,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2);
vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2);
- // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer)
- vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]);
- vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]);
+ // Indicate the sign bias for each reference frame buffer.
+ for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) {
+ vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[LAST_FRAME + i]);
+ }
// Signal whether to allow high MV precision
vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0);
@@ -1960,87 +1938,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
active_section = 7;
#endif
- // Signal whether or not Segmentation is enabled
- vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0);
-
- // Indicate which features are enabled
- if (xd->segmentation_enabled) {
- // Indicate whether or not the segmentation map is being updated.
- vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0);
-#if CONFIG_IMPLICIT_SEGMENTATION
- vp9_write_bit(&header_bc, (xd->allow_implicit_segment_update) ? 1 : 0);
-#endif
-
- // If it is, then indicate the method that will be used.
- if (xd->update_mb_segmentation_map) {
- // Select the coding strategy (temporal or spatial)
- vp9_choose_segmap_coding_method(cpi);
- // Send the tree probabilities used to decode unpredicted
- // macro-block segments
- for (i = 0; i < MB_SEG_TREE_PROBS; i++) {
- const int prob = xd->mb_segment_tree_probs[i];
- if (prob != 255) {
- vp9_write_bit(&header_bc, 1);
- vp9_write_prob(&header_bc, prob);
- } else {
- vp9_write_bit(&header_bc, 0);
- }
- }
-
- // Write out the chosen coding method.
- vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0);
- if (pc->temporal_update) {
- for (i = 0; i < PREDICTION_PROBS; i++) {
- const int prob = pc->segment_pred_probs[i];
- if (prob != 255) {
- vp9_write_bit(&header_bc, 1);
- vp9_write_prob(&header_bc, prob);
- } else {
- vp9_write_bit(&header_bc, 0);
- }
- }
- }
- }
-
- vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0);
-
- // segment_reference_frames(cpi);
-
- if (xd->update_mb_segmentation_data) {
- vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0);
-
- // For each segments id...
- for (i = 0; i < MAX_MB_SEGMENTS; i++) {
- // For each segmentation codable feature...
- for (j = 0; j < SEG_LVL_MAX; j++) {
- const int8_t data = vp9_get_segdata(xd, i, j);
- const int data_max = vp9_seg_feature_data_max(j);
-
- // If the feature is enabled...
- if (vp9_segfeature_active(xd, i, j)) {
- vp9_write_bit(&header_bc, 1);
-
- // Is the segment data signed..
- if (vp9_is_segfeature_signed(j)) {
- // Encode the relevant feature data
- if (data < 0) {
- vp9_encode_unsigned_max(&header_bc, -data, data_max);
- vp9_write_bit(&header_bc, 1);
- } else {
- vp9_encode_unsigned_max(&header_bc, data, data_max);
- vp9_write_bit(&header_bc, 0);
- }
- } else {
- // Unsigned data element so no sign bit needed
- vp9_encode_unsigned_max(&header_bc, data, data_max);
- }
- } else {
- vp9_write_bit(&header_bc, 0);
- }
- }
- }
- }
- }
+ encode_segmentation(cpi, &header_bc);
// Encode the common prediction model status flag probability updates for
// the reference frame
@@ -2153,15 +2051,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob);
vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob);
vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob);
- vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob);
- vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob);
vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob);
cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc;
#if CONFIG_COMP_INTERINTRA_PRED
cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob;
#endif
vp9_zero(cpi->sub_mv_ref_count);
- vp9_zero(cpi->mbsplit_count);
vp9_zero(cpi->common.fc.mv_ref_ct);
update_coef_probs(cpi, &header_bc);
@@ -2173,9 +2068,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
active_section = 2;
#endif
- // TODO(jkoleszar): remove this unused bit
- vp9_write_bit(&header_bc, 1);
-
vp9_update_skip_probs(cpi);
for (i = 0; i < MBSKIP_CONTEXTS; ++i) {
vp9_write_prob(&header_bc, pc->mbskip_pred_probs[i]);
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 2c06457e7..6bc42c7ff 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -29,7 +29,7 @@ typedef struct {
B_PREDICTION_MODE mode;
int_mv mv;
int_mv second_mv;
- } bmi[16];
+ } bmi[4];
} PARTITION_INFO;
// Structure to hold snapshot of coding context during the mode picking process
@@ -117,7 +117,6 @@ struct macroblock {
int mbmode_cost[2][MB_MODE_COUNT];
int intra_uv_mode_cost[2][MB_MODE_COUNT];
int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES];
- int i8x8_mode_costs[MB_MODE_COUNT];
int inter_bmode_costs[B_MODE_COUNT];
int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1]
[VP9_SWITCHABLE_FILTERS];
@@ -141,6 +140,9 @@ struct macroblock {
// Structure to hold context for each of the 4 MBs within a SB:
// when encoded as 4 independent MBs:
+ PICK_MODE_CONTEXT sb8_context[4][4][4];
+ PICK_MODE_CONTEXT sb8x16_context[4][4][2];
+ PICK_MODE_CONTEXT sb16x8_context[4][4][2];
PICK_MODE_CONTEXT mb_context[4][4];
PICK_MODE_CONTEXT sb32x16_context[4][2];
PICK_MODE_CONTEXT sb16x32_context[4][2];
@@ -157,12 +159,6 @@ struct macroblock {
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
int y_blocks);
- void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2,
- int y_blocks);
- void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
- int y_blocks);
- void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
- int y_blocks);
};
#endif // VP9_ENCODER_VP9_BLOCK_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 52065df52..6366d382f 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -20,7 +20,6 @@
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/encoder/vp9_segmentation.h"
-#include "vp9/common/vp9_setupintrarecon.h"
#include "vp9/encoder/vp9_encodeintra.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_invtrans.h"
@@ -47,9 +46,6 @@ int enc_debug = 0;
void vp9_select_interp_filter_type(VP9_COMP *cpi);
-static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int output_enabled, int mi_row, int mi_col);
-
static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
int output_enabled, int mi_row, int mi_col,
BLOCK_SIZE_TYPE bsize);
@@ -380,6 +376,8 @@ static void update_state(VP9_COMP *cpi,
}
}
if (bsize < BLOCK_SIZE_SB32X32) {
+ if (bsize < BLOCK_SIZE_MB16X16)
+ ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8];
ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16];
}
@@ -387,19 +385,10 @@ static void update_state(VP9_COMP *cpi,
vpx_memcpy(x->partition_info, &ctx->partition_info,
sizeof(PARTITION_INFO));
- mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int;
- mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
-#if CONFIG_SB8X8
- vpx_memcpy(x->partition_info + mis, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- vpx_memcpy(x->partition_info + 1, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- vpx_memcpy(x->partition_info + mis + 1, &ctx->partition_info,
- sizeof(PARTITION_INFO));
- xd->mode_info_context[1].mbmi =
- xd->mode_info_context[mis].mbmi =
- xd->mode_info_context[1 + mis].mbmi = *mbmi;
-#endif
+ mbmi->mv[0].as_int =
+ x->partition_info->bmi[3].mv.as_int;
+ mbmi->mv[1].as_int =
+ x->partition_info->bmi[3].second_mv.as_int;
}
x->skip = ctx->skip;
@@ -453,7 +442,6 @@ static void update_state(VP9_COMP *cpi,
THR_D27_PRED /*D27_PRED*/,
THR_D63_PRED /*D63_PRED*/,
THR_TM /*TM_PRED*/,
- THR_I8X8_PRED /*I8X8_PRED*/,
THR_B_PRED /*I4X4_PRED*/,
};
cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++;
@@ -491,11 +479,13 @@ static void update_state(VP9_COMP *cpi,
mbmi->best_mv.as_int = best_mv.as_int;
mbmi->best_second_mv.as_int = best_second_mv.as_int;
vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv);
-#if CONFIG_SB8X8
- xd->mode_info_context[1].mbmi =
- xd->mode_info_context[mis].mbmi =
- xd->mode_info_context[1 + mis].mbmi = *mbmi;
-#endif
+ }
+
+ if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) {
+ int i, j;
+ for (j = 0; j < bh; ++j)
+ for (i = 0; i < bw; ++i)
+ xd->mode_info_context[mis * j + i].mbmi = *mbmi;
}
#if CONFIG_COMP_INTERINTRA_PRED
if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV &&
@@ -567,8 +557,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8);
- xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3);
+ xd->above_seg_context = cm->above_seg_context + (mi_col >> 1);
+ xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3);
}
static void set_offsets(VP9_COMP *cpi,
@@ -580,17 +570,17 @@ static void set_offsets(VP9_COMP *cpi,
const int dst_fb_idx = cm->new_fb_idx;
const int idx_str = xd->mode_info_stride * mi_row + mi_col;
const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize);
- const int mb_row = mi_row >> CONFIG_SB8X8;
- const int mb_col = mi_col >> CONFIG_SB8X8;
+ const int mb_row = mi_row >> 1;
+ const int mb_col = mi_col >> 1;
const int idx_map = mb_row * cm->mb_cols + mb_col;
int i;
// entropy context structures
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].above_context = cm->above_context[i] +
- (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[i].subsampling_x));
+ (mi_col * 2 >> xd->plane[i].subsampling_x);
xd->plane[i].left_context = cm->left_context[i] +
- (((mi_row * 4 >> CONFIG_SB8X8) & 15) >> xd->plane[i].subsampling_y);
+ (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y);
}
// partition contexts
@@ -631,13 +621,11 @@ static void set_offsets(VP9_COMP *cpi,
/* segment ID */
if (xd->segmentation_enabled) {
- if (xd->update_mb_segmentation_map) {
- mbmi->segment_id = find_seg_id(cpi->segmentation_map, bsize,
- mi_row, cm->mi_rows, mi_col, cm->mi_cols);
- } else {
- mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, bsize,
- mi_row, cm->mi_rows, mi_col, cm->mi_cols);
- }
+ uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map
+ : cm->last_frame_seg_map;
+ mbmi->segment_id = find_seg_id(map, bsize, mi_row,
+ cm->mi_rows, mi_col, cm->mi_cols);
+
assert(mbmi->segment_id <= (MAX_MB_SEGMENTS-1));
vp9_mb_init_quantizer(cpi, x);
@@ -655,9 +643,9 @@ static void set_offsets(VP9_COMP *cpi,
const int p16 = ((mb_row & 1) << 1) + (mb_col & 1);
const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1);
const int tile_progress =
- cm->cur_tile_mi_col_start * cm->mb_rows >> CONFIG_SB8X8;
+ cm->cur_tile_mi_col_start * cm->mb_rows >> 1;
const int mb_cols =
- (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> CONFIG_SB8X8;
+ (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> 1;
cpi->seg0_progress =
((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs;
@@ -667,47 +655,6 @@ static void set_offsets(VP9_COMP *cpi,
}
}
-static int pick_mb_mode(VP9_COMP *cpi,
- int mi_row,
- int mi_col,
- TOKENEXTRA **tp,
- int *totalrate,
- int *totaldist) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- int splitmodes_used = 0;
- MB_MODE_INFO *mbmi;
-
- set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_MB16X16);
-
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
- vp9_activity_masking(cpi, x);
-
- mbmi = &xd->mode_info_context->mbmi;
- mbmi->sb_type = BLOCK_SIZE_MB16X16;
-
- // Find best coding mode & reconstruct the MB so it is available
- // as a predictor for MBs that follow in the SB
- if (cm->frame_type == KEY_FRAME) {
- vp9_rd_pick_intra_mode(cpi, x, totalrate, totaldist);
-
- // Save the coding context
- vpx_memcpy(&x->mb_context[xd->sb_index][xd->mb_index].mic,
- xd->mode_info_context, sizeof(MODE_INFO));
- } else {
- vp9_pick_mode_inter_macroblock(cpi, x, mi_row, mi_col,
- totalrate, totaldist);
- splitmodes_used += (mbmi->mode == SPLITMV);
-
- if (cpi->mb.e_mbd.segmentation_enabled && mbmi->segment_id == 0) {
- cpi->seg0_idx++;
- }
- }
-
- return splitmodes_used;
-}
-
static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
TOKENEXTRA **tp, int *totalrate, int *totaldist,
BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) {
@@ -790,11 +737,10 @@ static void set_block_index(MACROBLOCKD *xd, int idx,
BLOCK_SIZE_TYPE bsize) {
if (bsize >= BLOCK_SIZE_SB32X32) {
xd->sb_index = idx;
- } else {
-#if CONFIG_SB8X8
- assert(bsize >= BLOCK_SIZE_MB16X16);
-#endif
+ } else if (bsize >= BLOCK_SIZE_MB16X16) {
xd->mb_index = idx;
+ } else {
+ xd->b_index = idx;
}
}
@@ -817,6 +763,12 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x,
return &x->sb16x32_context[xd->sb_index][xd->mb_index];
case BLOCK_SIZE_MB16X16:
return &x->mb_context[xd->sb_index][xd->mb_index];
+ case BLOCK_SIZE_SB16X8:
+ return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index];
+ case BLOCK_SIZE_SB8X16:
+ return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index];
+ case BLOCK_SIZE_SB8X8:
+ return &x->sb8_context[xd->sb_index][xd->mb_index][xd->b_index];
default:
assert(0);
return NULL;
@@ -837,14 +789,7 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
set_block_index(xd, sub_index, bsize);
set_offsets(cpi, mi_row, mi_col, bsize);
update_state(cpi, get_block_context(x, bsize), bsize, output_enabled);
- if (bsize == BLOCK_SIZE_MB16X16) {
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
- vp9_activity_masking(cpi, x);
-
- encode_macroblock(cpi, tp, output_enabled, mi_row, mi_col);
- } else {
- encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
- }
+ encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize);
if (output_enabled) {
update_stats(cpi, mi_row, mi_col);
@@ -857,22 +802,26 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp,
static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
int mi_row, int mi_col, int output_enabled,
BLOCK_SIZE_TYPE level,
- BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4]) {
+ BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4],
+ BLOCK_SIZE_TYPE c3[4][4]
+ ) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int bsl = mi_width_log2(level), bs = 1 << (bsl - 1);
const int bwl = mi_width_log2(c1), bhl = mi_height_log2(c1);
- int pl;
+ int UNINITIALIZED_IS_SAFE(pl);
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- set_partition_seg_context(cpi, mi_row, mi_col);
- pl = partition_plane_context(xd, level);
+ if (level > BLOCK_SIZE_SB8X8) {
+ set_partition_seg_context(cpi, mi_row, mi_col);
+ pl = partition_plane_context(xd, level);
+ }
if (bsl == bwl && bsl == bhl) {
- if (output_enabled && level > BLOCK_SIZE_MB16X16)
+ if (output_enabled && level > BLOCK_SIZE_SB8X8)
cpi->partition_count[pl][PARTITION_NONE]++;
encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1);
} else if (bsl == bhl && bsl > bwl) {
@@ -892,9 +841,11 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
assert(bwl < bsl && bhl < bsl);
if (level == BLOCK_SIZE_SB64X64) {
subsize = BLOCK_SIZE_SB32X32;
- } else {
- assert(level == BLOCK_SIZE_SB32X32);
+ } else if (level == BLOCK_SIZE_SB32X32) {
subsize = BLOCK_SIZE_MB16X16;
+ } else {
+ assert(level == BLOCK_SIZE_MB16X16);
+ subsize = BLOCK_SIZE_SB8X8;
}
if (output_enabled)
@@ -906,12 +857,12 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp,
set_block_index(xd, i, subsize);
encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs,
output_enabled, subsize,
- subsize == BLOCK_SIZE_MB16X16 ? c1 : c2[i], c2);
+ c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL);
}
}
- if (level > BLOCK_SIZE_MB16X16 &&
- (level == BLOCK_SIZE_SB32X32 || bsl == bwl || bsl == bhl)) {
+ if (level > BLOCK_SIZE_SB8X8 &&
+ (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) {
set_partition_seg_context(cpi, mi_row, mi_col);
update_partition_context(xd, c1, level);
}
@@ -932,9 +883,11 @@ static void encode_sb_row(VP9_COMP *cpi,
// Code each SB in the row
for (mi_col = cm->cur_tile_mi_col_start;
- mi_col < cm->cur_tile_mi_col_end; mi_col += (4 << CONFIG_SB8X8)) {
+ mi_col < cm->cur_tile_mi_col_end; mi_col += 8) {
int i, p;
+ BLOCK_SIZE_TYPE mb_partitioning[4][4];
BLOCK_SIZE_TYPE sb_partitioning[4];
+ BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32;
int sb64_rate = 0, sb64_dist = 0;
int sb64_skip = 0;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
@@ -943,23 +896,27 @@ static void encode_sb_row(VP9_COMP *cpi,
for (p = 0; p < MAX_MB_PLANE; p++) {
memcpy(a + 16 * p, cm->above_context[p] +
- (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+ (mi_col * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
memcpy(l + 16 * p, cm->left_context[p],
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
}
- memcpy(&seg_a, cm->above_seg_context + (mi_col >> CONFIG_SB8X8),
+ memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1),
sizeof(seg_a));
memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l));
+
+ // FIXME(rbultje): this function should probably be rewritten to be
+ // recursive at some point in the future.
for (i = 0; i < 4; i++) {
- const int x_idx = (i & 1) << (1 + CONFIG_SB8X8);
- const int y_idx = (i & 2) << CONFIG_SB8X8;
+ const int x_idx = (i & 1) << 2;
+ const int y_idx = (i & 2) << 1;
int sb32_rate = 0, sb32_dist = 0;
int splitmodes_used = 0;
int sb32_skip = 0;
int j;
ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE];
+ sb_partitioning[i] = BLOCK_SIZE_MB16X16;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -969,22 +926,23 @@ static void encode_sb_row(VP9_COMP *cpi,
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(l2 + 8 * p,
cm->left_context[p] +
- (y_idx * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_y)),
+ (y_idx * 2 >> xd->plane[p].subsampling_y),
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
vpx_memcpy(a2 + 8 * p,
cm->above_context[p] +
- ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_x)),
+ ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
/* Encode MBs in raster order within the SB */
- sb_partitioning[i] = BLOCK_SIZE_MB16X16;
for (j = 0; j < 4; j++) {
- const int x_idx_m = x_idx + ((j & 1) << CONFIG_SB8X8);
- const int y_idx_m = y_idx + ((j >> 1) << CONFIG_SB8X8);
+ const int x_idx_m = x_idx + ((j & 1) << 1);
+ const int y_idx_m = y_idx + ((j >> 1) << 1);
int r, d;
+ int r2, d2, mb16_rate = 0, mb16_dist = 0, k;
+ ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE];
+
+ mb_partitioning[i][j] = BLOCK_SIZE_SB8X8;
if (mi_row + y_idx_m >= cm->mi_rows ||
mi_col + x_idx_m >= cm->mi_cols) {
@@ -995,30 +953,165 @@ static void encode_sb_row(VP9_COMP *cpi,
// Index of the MB in the SB 0..3
xd->mb_index = j;
- splitmodes_used += pick_mb_mode(cpi, mi_row + y_idx_m,
- mi_col + x_idx_m, tp, &r, &d);
- sb32_rate += r;
- sb32_dist += d;
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ vpx_memcpy(l3 + 4 * p,
+ cm->left_context[p] +
+ (y_idx_m * 2 >> xd->plane[p].subsampling_y),
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
+ vpx_memcpy(a3 + 4 * p,
+ cm->above_context[p] +
+ ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
+ }
+
+ for (k = 0; k < 4; k++) {
+ xd->b_index = k;
+
+ // try 8x8 coding
+ pick_sb_modes(cpi, mi_row + y_idx_m + (k >> 1),
+ mi_col + x_idx_m + (k & 1),
+ tp, &r, &d, BLOCK_SIZE_SB8X8,
+ &x->sb8_context[xd->sb_index][xd->mb_index]
+ [xd->b_index]);
+ mb16_rate += r;
+ mb16_dist += d;
+ update_state(cpi, &x->sb8_context[xd->sb_index][xd->mb_index]
+ [xd->b_index],
+ BLOCK_SIZE_SB8X8, 0);
+ encode_superblock(cpi, tp,
+ 0, mi_row + y_idx_m + (k >> 1),
+ mi_col + x_idx_m + (k & 1),
+ BLOCK_SIZE_SB8X8);
+ }
+ set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
+ pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
+ mb16_rate += x->partition_cost[pl][PARTITION_SPLIT];
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ vpx_memcpy(cm->left_context[p] +
+ (y_idx_m * 2 >> xd->plane[p].subsampling_y),
+ l3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
+ vpx_memcpy(cm->above_context[p] +
+ ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
+ a3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
+ }
+
+ // try 8x16 coding
+ r2 = 0;
+ d2 = 0;
+ xd->b_index = 0;
+ pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
+ tp, &r, &d, BLOCK_SIZE_SB8X16,
+ &x->sb8x16_context[xd->sb_index][xd->mb_index]
+ [xd->b_index]);
+ r2 += r;
+ d2 += d;
+ update_state(cpi, &x->sb8x16_context[xd->sb_index][xd->mb_index]
+ [xd->b_index],
+ BLOCK_SIZE_SB8X16, 0);
+ encode_superblock(cpi, tp,
+ 0, mi_row + y_idx_m, mi_col + x_idx_m,
+ BLOCK_SIZE_SB8X16);
+ xd->b_index = 1;
+ pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m + 1,
+ tp, &r, &d, BLOCK_SIZE_SB8X16,
+ &x->sb8x16_context[xd->sb_index][xd->mb_index]
+ [xd->b_index]);
+ r2 += r;
+ d2 += d;
+ set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
+ pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
+ r2 += x->partition_cost[pl][PARTITION_VERT];
+ if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
+ RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
+ mb16_rate = r2;
+ mb16_dist = d2;
+ mb_partitioning[i][j] = BLOCK_SIZE_SB8X16;
+ }
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ vpx_memcpy(cm->left_context[p] +
+ (y_idx_m * 2 >> xd->plane[p].subsampling_y),
+ l3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
+ vpx_memcpy(cm->above_context[p] +
+ ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
+ a3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
+ }
+
+ // try 16x8 coding
+ r2 = 0;
+ d2 = 0;
+ xd->b_index = 0;
+ pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
+ tp, &r, &d, BLOCK_SIZE_SB16X8,
+ &x->sb16x8_context[xd->sb_index][xd->mb_index]
+ [xd->b_index]);
+ r2 += r;
+ d2 += d;
+ update_state(cpi, &x->sb16x8_context[xd->sb_index][xd->mb_index]
+ [xd->b_index],
+ BLOCK_SIZE_SB16X8, 0);
+ encode_superblock(cpi, tp,
+ 0, mi_row + y_idx_m, mi_col + x_idx_m,
+ BLOCK_SIZE_SB16X8);
+ xd->b_index = 1;
+ pick_sb_modes(cpi, mi_row + y_idx_m + 1, mi_col + x_idx_m,
+ tp, &r, &d, BLOCK_SIZE_SB16X8,
+ &x->sb16x8_context[xd->sb_index][xd->mb_index]
+ [xd->b_index]);
+ r2 += r;
+ d2 += d;
+ set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
+ pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
+ r2 += x->partition_cost[pl][PARTITION_HORZ];
+ if (RDCOST(x->rdmult, x->rddiv, r2, d2) <
+ RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
+ mb16_rate = r2;
+ mb16_dist = d2;
+ mb_partitioning[i][j] = BLOCK_SIZE_SB16X8;
+ }
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ vpx_memcpy(cm->left_context[p] +
+ (y_idx_m * 2 >> xd->plane[p].subsampling_y),
+ l3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y);
+ vpx_memcpy(cm->above_context[p] +
+ ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x),
+ a3 + 4 * p,
+ sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x);
+ }
+
+ // try as 16x16
+ pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m,
+ tp, &r, &d, BLOCK_SIZE_MB16X16,
+ &x->mb_context[xd->sb_index][xd->mb_index]);
+ set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m);
+ pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16);
+ r += x->partition_cost[pl][PARTITION_NONE];
+ if (RDCOST(x->rdmult, x->rddiv, r, d) <
+ RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) {
+ mb16_rate = r;
+ mb16_dist = d;
+ mb_partitioning[i][j] = BLOCK_SIZE_MB16X16;
+ }
+ sb32_rate += mb16_rate;
+ sb32_dist += mb16_dist;
// Dummy encode, do not do the tokenization
-#if CONFIG_SB8X8
- update_state(cpi, &x->mb_context[xd->sb_index][xd->mb_index],
- BLOCK_SIZE_MB16X16, 0);
-#endif
- encode_macroblock(cpi, tp, 0, mi_row + y_idx_m,
- mi_col + x_idx_m);
+ encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0,
+ BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL);
}
/* Restore L & A coding context to those in place on entry */
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(cm->left_context[p] +
- (y_idx * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_y)),
+ (y_idx * 2 >> xd->plane[p].subsampling_y),
l2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_x)),
+ ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
a2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
@@ -1033,14 +1126,14 @@ static void encode_sb_row(VP9_COMP *cpi,
}
// check 32x16
- if (mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols) {
+ if (mi_col + x_idx + 4 <= cm->mi_cols) {
int r, d;
xd->mb_index = 0;
pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
tp, &r, &d, BLOCK_SIZE_SB32X16,
&x->sb32x16_context[xd->sb_index][xd->mb_index]);
- if (mi_row + y_idx + (1 << CONFIG_SB8X8) < cm->mi_rows) {
+ if (mi_row + y_idx + 2 < cm->mi_rows) {
int r2, d2;
update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index],
@@ -1049,7 +1142,7 @@ static void encode_sb_row(VP9_COMP *cpi,
0, mi_row + y_idx, mi_col + x_idx,
BLOCK_SIZE_SB32X16);
xd->mb_index = 1;
- pick_sb_modes(cpi, mi_row + y_idx + (1 << CONFIG_SB8X8),
+ pick_sb_modes(cpi, mi_row + y_idx + 2,
mi_col + x_idx, tp, &r2, &d2, BLOCK_SIZE_SB32X16,
&x->sb32x16_context[xd->sb_index][xd->mb_index]);
r += r2;
@@ -1070,27 +1163,25 @@ static void encode_sb_row(VP9_COMP *cpi,
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(cm->left_context[p] +
- (y_idx * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_y)),
+ (y_idx * 2 >> xd->plane[p].subsampling_y),
l2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_x)),
+ ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
a2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
}
// check 16x32
- if (mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) {
+ if (mi_row + y_idx + 4 <= cm->mi_rows) {
int r, d;
xd->mb_index = 0;
pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx,
tp, &r, &d, BLOCK_SIZE_SB16X32,
&x->sb16x32_context[xd->sb_index][xd->mb_index]);
- if (mi_col + x_idx + (1 << CONFIG_SB8X8) < cm->mi_cols) {
+ if (mi_col + x_idx + 2 < cm->mi_cols) {
int r2, d2;
update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index],
@@ -1100,7 +1191,7 @@ static void encode_sb_row(VP9_COMP *cpi,
BLOCK_SIZE_SB16X32);
xd->mb_index = 1;
pick_sb_modes(cpi, mi_row + y_idx,
- mi_col + x_idx + (1 << CONFIG_SB8X8),
+ mi_col + x_idx + 2,
tp, &r2, &d2, BLOCK_SIZE_SB16X32,
&x->sb16x32_context[xd->sb_index][xd->mb_index]);
r += r2;
@@ -1121,21 +1212,19 @@ static void encode_sb_row(VP9_COMP *cpi,
for (p = 0; p < MAX_MB_PLANE; p++) {
vpx_memcpy(cm->left_context[p] +
- (y_idx * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_y)),
+ (y_idx * 2 >> xd->plane[p].subsampling_y),
l2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y);
vpx_memcpy(cm->above_context[p] +
- ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 +
- xd->plane[p].subsampling_x)),
+ ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x),
a2 + 8 * p,
sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x);
}
}
if (!sb32_skip &&
- mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols &&
- mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) {
+ mi_col + x_idx + 4 <= cm->mi_cols &&
+ mi_row + y_idx + 4 <= cm->mi_rows) {
int r, d;
/* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
@@ -1170,18 +1259,19 @@ static void encode_sb_row(VP9_COMP *cpi,
// instead of small->big) means we can use as threshold for small, which
// may enable breakouts if RD is not good enough (i.e. faster)
encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0,
- BLOCK_SIZE_SB32X32, sb_partitioning[i], sb_partitioning);
+ BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i],
+ NULL);
}
for (p = 0; p < MAX_MB_PLANE; p++) {
memcpy(cm->above_context[p] +
- (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+ (mi_col * 2 >> xd->plane[p].subsampling_x),
a + 16 * p,
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
memcpy(cm->left_context[p], l + 16 * p,
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y);
}
- memcpy(cm->above_seg_context + (mi_col >> CONFIG_SB8X8), &seg_a,
+ memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a,
sizeof(seg_a));
memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l));
@@ -1190,14 +1280,14 @@ static void encode_sb_row(VP9_COMP *cpi,
sb64_rate += x->partition_cost[pl][PARTITION_SPLIT];
// check 64x32
- if (mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols && !(cm->mb_rows & 1)) {
+ if (mi_col + 8 <= cm->mi_cols && !(cm->mb_rows & 1)) {
int r, d;
xd->sb_index = 0;
pick_sb_modes(cpi, mi_row, mi_col,
tp, &r, &d, BLOCK_SIZE_SB64X32,
&x->sb64x32_context[xd->sb_index]);
- if (mi_row + (2 << CONFIG_SB8X8) != cm->mi_rows) {
+ if (mi_row + 4 != cm->mi_rows) {
int r2, d2;
update_state(cpi, &x->sb64x32_context[xd->sb_index],
@@ -1205,7 +1295,7 @@ static void encode_sb_row(VP9_COMP *cpi,
encode_superblock(cpi, tp,
0, mi_row, mi_col, BLOCK_SIZE_SB64X32);
xd->sb_index = 1;
- pick_sb_modes(cpi, mi_row + (2 << CONFIG_SB8X8), mi_col,
+ pick_sb_modes(cpi, mi_row + 4, mi_col,
tp, &r2, &d2, BLOCK_SIZE_SB64X32,
&x->sb64x32_context[xd->sb_index]);
r += r2;
@@ -1221,12 +1311,12 @@ static void encode_sb_row(VP9_COMP *cpi,
RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
sb64_rate = r;
sb64_dist = d;
- sb_partitioning[0] = BLOCK_SIZE_SB64X32;
+ sb64_partitioning = BLOCK_SIZE_SB64X32;
}
for (p = 0; p < MAX_MB_PLANE; p++) {
memcpy(cm->above_context[p] +
- (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+ (mi_col * 2 >> xd->plane[p].subsampling_x),
a + 16 * p,
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
memcpy(cm->left_context[p], l + 16 * p,
@@ -1235,14 +1325,14 @@ static void encode_sb_row(VP9_COMP *cpi,
}
// check 32x64
- if (mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows && !(cm->mb_cols & 1)) {
+ if (mi_row + 8 <= cm->mi_rows && !(cm->mb_cols & 1)) {
int r, d;
xd->sb_index = 0;
pick_sb_modes(cpi, mi_row, mi_col,
tp, &r, &d, BLOCK_SIZE_SB32X64,
&x->sb32x64_context[xd->sb_index]);
- if (mi_col + (2 << CONFIG_SB8X8) != cm->mi_cols) {
+ if (mi_col + 4 != cm->mi_cols) {
int r2, d2;
update_state(cpi, &x->sb32x64_context[xd->sb_index],
@@ -1250,7 +1340,7 @@ static void encode_sb_row(VP9_COMP *cpi,
encode_superblock(cpi, tp,
0, mi_row, mi_col, BLOCK_SIZE_SB32X64);
xd->sb_index = 1;
- pick_sb_modes(cpi, mi_row, mi_col + (2 << CONFIG_SB8X8),
+ pick_sb_modes(cpi, mi_row, mi_col + 4,
tp, &r2, &d2, BLOCK_SIZE_SB32X64,
&x->sb32x64_context[xd->sb_index]);
r += r2;
@@ -1266,12 +1356,12 @@ static void encode_sb_row(VP9_COMP *cpi,
RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
sb64_rate = r;
sb64_dist = d;
- sb_partitioning[0] = BLOCK_SIZE_SB32X64;
+ sb64_partitioning = BLOCK_SIZE_SB32X64;
}
for (p = 0; p < MAX_MB_PLANE; p++) {
memcpy(cm->above_context[p] +
- (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)),
+ (mi_col * 2 >> xd->plane[p].subsampling_x),
a + 16 * p,
sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x);
memcpy(cm->left_context[p], l + 16 * p,
@@ -1280,8 +1370,8 @@ static void encode_sb_row(VP9_COMP *cpi,
}
if (!sb64_skip &&
- mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols &&
- mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows) {
+ mi_col + 8 <= cm->mi_cols &&
+ mi_row + 8 <= cm->mi_rows) {
int r, d;
pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d,
@@ -1295,13 +1385,13 @@ static void encode_sb_row(VP9_COMP *cpi,
RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) {
sb64_rate = r;
sb64_dist = d;
- sb_partitioning[0] = BLOCK_SIZE_SB64X64;
+ sb64_partitioning = BLOCK_SIZE_SB64X64;
}
}
assert(tp_orig == *tp);
- encode_sb(cpi, tp, mi_row, mi_col, 1,
- BLOCK_SIZE_SB64X64, sb_partitioning[0], sb_partitioning);
+ encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64,
+ sb64_partitioning, sb_partitioning, mb_partitioning);
assert(tp_orig < *tp);
}
}
@@ -1333,9 +1423,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
0, 0, NULL, NULL);
setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
- // set up frame for intra coded blocks
- vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
-
vp9_build_block_offsets(x);
vp9_setup_block_dptrs(&x->e_mbd);
@@ -1346,10 +1433,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
vp9_zero(cpi->count_mb_ref_frame_usage)
vp9_zero(cpi->bmode_count)
vp9_zero(cpi->ymode_count)
- vp9_zero(cpi->i8x8_mode_count)
vp9_zero(cpi->y_uv_mode_count)
vp9_zero(cpi->sub_mv_ref_count)
- vp9_zero(cpi->mbsplit_count)
vp9_zero(cpi->common.fc.mv_ref_ct)
vp9_zero(cpi->sb_ymode_count)
vp9_zero(cpi->partition_count);
@@ -1487,7 +1572,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_get_tile_col_offsets(cm, tile_col);
for (mi_row = cm->cur_tile_mi_row_start;
mi_row < cm->cur_tile_mi_row_end;
- mi_row += (4 << CONFIG_SB8X8)) {
+ mi_row += 8) {
encode_sb_row(cpi, mi_row, &tp, &totalrate);
}
cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old);
@@ -1616,9 +1701,11 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi,
assert(bwl < bsl && bhl < bsl);
if (bsize == BLOCK_SIZE_SB64X64) {
subsize = BLOCK_SIZE_SB32X32;
- } else {
- assert(bsize == BLOCK_SIZE_SB32X32);
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
subsize = BLOCK_SIZE_MB16X16;
+ } else {
+ assert(bsize == BLOCK_SIZE_MB16X16);
+ subsize = BLOCK_SIZE_SB8X8;
}
for (n = 0; n < 4; n++) {
@@ -1638,10 +1725,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) {
MODE_INFO *mi, *mi_ptr = cm->mi;
for (mi_row = 0; mi_row < cm->mi_rows;
- mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) {
+ mi_row += 8, mi_ptr += 8 * mis) {
mi = mi_ptr;
for (mi_col = 0; mi_col < cm->mi_cols;
- mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) {
+ mi_col += 8, mi += 8) {
reset_skip_txfm_size_sb(cpi, mi, txfm_max,
mi_row, mi_col, BLOCK_SIZE_SB64X64);
}
@@ -1823,30 +1910,16 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
do {
++ bct[xd->block[b].bmi.as_mode.first];
- } while (++b < 16);
- }
-
- if (m == I8X8_PRED) {
- i8x8_modes[xd->block[0].bmi.as_mode.first]++;
- i8x8_modes[xd->block[2].bmi.as_mode.first]++;
- i8x8_modes[xd->block[8].bmi.as_mode.first]++;
- i8x8_modes[xd->block[10].bmi.as_mode.first]++;
+ } while (++b < 4);
}
#endif
- if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16) {
+ if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) {
++cpi->sb_ymode_count[m];
} else {
++cpi->ymode_count[m];
}
- if (m != I8X8_PRED)
++cpi->y_uv_mode_count[m][uvm];
- else {
- cpi->i8x8_mode_count[xd->mode_info_context->bmi[0].as_mode.first]++;
- cpi->i8x8_mode_count[xd->mode_info_context->bmi[2].as_mode.first]++;
- cpi->i8x8_mode_count[xd->mode_info_context->bmi[8].as_mode.first]++;
- cpi->i8x8_mode_count[xd->mode_info_context->bmi[10].as_mode.first]++;
- }
if (m == I4X4_PRED) {
int b = 0;
do {
@@ -1855,7 +1928,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) {
if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS;
#endif
++cpi->bmode_count[m];
- } while (++b < 16);
+ } while (++b < 4);
}
}
@@ -1880,254 +1953,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) {
#endif
}
-static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t,
- int output_enabled,
- int mi_row, int mi_col) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->mb;
- MACROBLOCKD *const xd = &x->e_mbd;
- MODE_INFO *mi = xd->mode_info_context;
- MB_MODE_INFO *const mbmi = &mi->mbmi;
- const int mis = cm->mode_info_stride;
-#if CONFIG_SB8X8
- int n;
-#endif
-
- assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_MB16X16);
-
-#ifdef ENC_DEBUG
- enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame &&
- mb_row == 8 && mb_col == 0 && output_enabled);
- if (enc_debug)
- printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled);
-#endif
- if (cm->frame_type == KEY_FRAME) {
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) {
- // Adjust the zbin based on this MB rate.
- adjust_act_zbin(cpi, x);
- vp9_update_zbin_extra(cpi, x);
- }
- } else {
- vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
- // Adjust the zbin based on this MB rate.
- adjust_act_zbin(cpi, x);
- }
-
- // Experimental code. Special case for gf and arf zeromv modes.
- // Increase zbin size to suppress noise
- cpi->zbin_mode_boost = 0;
- if (cpi->zbin_mode_boost_enabled) {
- if (mbmi->ref_frame != INTRA_FRAME) {
- if (mbmi->mode == ZEROMV) {
- if (mbmi->ref_frame != LAST_FRAME)
- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
- else
- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
- } else if (mbmi->mode == SPLITMV)
- cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST;
- else
- cpi->zbin_mode_boost = MV_ZBIN_BOOST;
- } else {
- cpi->zbin_mode_boost = INTRA_ZBIN_BOOST;
- }
- }
-
- vp9_update_zbin_extra(cpi, x);
- }
-
- if (mbmi->ref_frame == INTRA_FRAME) {
-#if 0 // def ENC_DEBUG
- if (enc_debug) {
- printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip,
- mbmi->txfm_size);
- }
-#endif
- if (mbmi->mode == I4X4_PRED) {
- vp9_encode_intra16x16mbuv(cm, x);
- vp9_encode_intra4x4mby(x, BLOCK_SIZE_MB16X16);
- } else if (mbmi->mode == I8X8_PRED) {
- vp9_encode_intra8x8mby(x);
- vp9_encode_intra8x8mbuv(x);
- } else {
- vp9_encode_intra16x16mbuv(cm, x);
- vp9_encode_intra16x16mby(cm, x);
- }
-
- if (output_enabled)
- sum_intra_stats(cpi, x);
- } else {
- int ref_fb_idx, second_ref_fb_idx;
-#ifdef ENC_DEBUG
- if (enc_debug)
- printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n",
- mbmi->mode, x->skip, mbmi->txfm_size,
- mbmi->ref_frame, mbmi->second_ref_frame,
- mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
- mbmi->interp_filter);
-#endif
-
- assert(cm->frame_type != KEY_FRAME);
-
- if (mbmi->ref_frame == LAST_FRAME)
- ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
- else if (mbmi->ref_frame == GOLDEN_FRAME)
- ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
- else
- ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
-
- if (mbmi->second_ref_frame > 0) {
- if (mbmi->second_ref_frame == LAST_FRAME)
- second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx];
- else if (mbmi->second_ref_frame == GOLDEN_FRAME)
- second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx];
- else
- second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx];
- }
-
- setup_pre_planes(xd,
- &cpi->common.yv12_fb[ref_fb_idx],
- mbmi->second_ref_frame > 0 ? &cpi->common.yv12_fb[second_ref_fb_idx]
- : NULL,
- mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv);
-
- if (!x->skip) {
- vp9_encode_inter16x16(cm, x, mi_row, mi_col);
- } else {
- vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
-#if CONFIG_COMP_INTERINTRA_PRED
- if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) {
- vp9_build_interintra_predictors(xd,
- xd->plane[0].dst.buf,
- xd->plane[1].dst.buf,
- xd->plane[2].dst.buf,
- xd->plane[0].dst.stride,
- xd->plane[1].dst.stride,
- BLOCK_SIZE_MB16X16);
- }
-#endif
- }
- }
-
- if (!x->skip) {
-#ifdef ENC_DEBUG
- if (enc_debug) {
- int i, j;
- printf("\n");
- printf("qcoeff\n");
- for (i = 0; i < 384; i++) {
- printf("%3d ", xd->qcoeff[i]);
- if (i % 16 == 15) printf("\n");
- }
- printf("\n");
- printf("predictor\n");
- for (i = 0; i < 384; i++) {
- printf("%3d ", xd->predictor[i]);
- if (i % 16 == 15) printf("\n");
- }
- printf("\n");
- printf("src_diff\n");
- for (i = 0; i < 384; i++) {
- printf("%3d ", x->src_diff[i]);
- if (i % 16 == 15) printf("\n");
- }
- printf("\n");
- printf("diff\n");
- for (i = 0; i < 384; i++) {
- printf("%3d ", xd->block[0].diff[i]);
- if (i % 16 == 15) printf("\n");
- }
- printf("\n");
- printf("final y\n");
- for (i = 0; i < 16; i++) {
- for (j = 0; j < 16; j++)
- printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]);
- printf("\n");
- }
- printf("\n");
- printf("final u\n");
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++)
- printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]);
- printf("\n");
- }
- printf("\n");
- printf("final v\n");
- for (i = 0; i < 8; i++) {
- for (j = 0; j < 8; j++)
- printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]);
- printf("\n");
- }
- fflush(stdout);
- }
-#endif
-
- vp9_tokenize_sb(cpi, xd, t, !output_enabled, BLOCK_SIZE_MB16X16);
- } else {
- // FIXME(rbultje): not tile-aware (mi - 1)
- int mb_skip_context =
- (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff;
-
- mbmi->mb_skip_coeff = 1;
- if (output_enabled)
- cpi->skip_true_count[mb_skip_context]++;
- vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16);
- }
-
-#if CONFIG_SB8X8
- // copy skip flag on all mb_mode_info contexts in this SB
- // if this was a skip at this txfm size
- for (n = 1; n < 4; n++) {
- const int x_idx = n & 1, y_idx = n >> 1;
- if (mi_col + x_idx < cm->mi_cols && mi_row + y_idx < cm->mi_rows)
- mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
- }
-#endif
-
- if (output_enabled) {
- int segment_id = mbmi->segment_id;
- if (cpi->common.txfm_mode == TX_MODE_SELECT &&
- !(mbmi->mb_skip_coeff ||
- vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP))) {
- assert(mbmi->txfm_size <= TX_16X16);
- if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED &&
- mbmi->mode != SPLITMV) {
- cpi->txfm_count_16x16p[mbmi->txfm_size]++;
- } else if (mbmi->mode == I8X8_PRED ||
- (mbmi->mode == SPLITMV &&
- mbmi->partitioning != PARTITIONING_4X4)) {
- cpi->txfm_count_8x8p[mbmi->txfm_size]++;
- }
- } else {
-#if CONFIG_SB8X8
- int y, x;
-#endif
- if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED &&
- mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) {
- mbmi->txfm_size = TX_16X16;
- } else if (mbmi->mode != I4X4_PRED &&
- !(mbmi->mode == SPLITMV &&
- mbmi->partitioning == PARTITIONING_4X4) &&
- cpi->common.txfm_mode >= ALLOW_8X8) {
- mbmi->txfm_size = TX_8X8;
- } else {
- mbmi->txfm_size = TX_4X4;
- }
-
-#if CONFIG_SB8X8
- for (y = 0; y < 2; y++) {
- for (x = !y; x < 2; x++) {
- if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) {
- mi[mis * y + x].mbmi.txfm_size = mbmi->txfm_size;
- }
- }
- }
-#endif
- }
- }
-}
-
static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
int output_enabled, int mi_row, int mi_col,
BLOCK_SIZE_TYPE bsize) {
@@ -2177,7 +2002,17 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
vp9_update_zbin_extra(cpi, x);
}
- if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ if (xd->mode_info_context->mbmi.mode == I4X4_PRED) {
+ assert(bsize == BLOCK_SIZE_SB8X8 &&
+ xd->mode_info_context->mbmi.txfm_size == TX_4X4);
+
+ vp9_encode_intra4x4mby(x, bsize);
+ vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize);
+ vp9_encode_sbuv(cm, x, bsize);
+
+ if (output_enabled)
+ sum_intra_stats(cpi, x);
+ } else if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize);
vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize);
if (output_enabled)
@@ -2212,83 +2047,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
}
- if (!x->skip) {
- vp9_subtract_sb(x, bsize);
-
- switch (xd->mode_info_context->mbmi.txfm_size) {
- case TX_32X32:
- vp9_transform_sby_32x32(x, bsize);
- vp9_quantize_sby_32x32(x, bsize);
- if (bsize == BLOCK_SIZE_SB64X64) {
- vp9_transform_sbuv_32x32(x, bsize);
- vp9_quantize_sbuv_32x32(x, bsize);
- } else {
- vp9_transform_sbuv_16x16(x, bsize);
- vp9_quantize_sbuv_16x16(x, bsize);
- }
- if (x->optimize) {
- vp9_optimize_sby_32x32(cm, x, bsize);
- if (bsize == BLOCK_SIZE_SB64X64)
- vp9_optimize_sbuv_32x32(cm, x, bsize);
- else
- vp9_optimize_sbuv_16x16(cm, x, bsize);
- }
- vp9_inverse_transform_sby_32x32(xd, bsize);
- if (bsize == BLOCK_SIZE_SB64X64)
- vp9_inverse_transform_sbuv_32x32(xd, bsize);
- else
- vp9_inverse_transform_sbuv_16x16(xd, bsize);
- break;
- case TX_16X16:
- vp9_transform_sby_16x16(x, bsize);
- vp9_quantize_sby_16x16(x, bsize);
- if (bsize >= BLOCK_SIZE_SB32X32) {
- vp9_transform_sbuv_16x16(x, bsize);
- vp9_quantize_sbuv_16x16(x, bsize);
- } else {
- vp9_transform_sbuv_8x8(x, bsize);
- vp9_quantize_sbuv_8x8(x, bsize);
- }
- if (x->optimize) {
- vp9_optimize_sby_16x16(cm, x, bsize);
- if (bsize >= BLOCK_SIZE_SB32X32)
- vp9_optimize_sbuv_16x16(cm, x, bsize);
- else
- vp9_optimize_sbuv_8x8(cm, x, bsize);
- }
- vp9_inverse_transform_sby_16x16(xd, bsize);
- if (bsize >= BLOCK_SIZE_SB32X32)
- vp9_inverse_transform_sbuv_16x16(xd, bsize);
- else
- vp9_inverse_transform_sbuv_8x8(xd, bsize);
- break;
- case TX_8X8:
- vp9_transform_sby_8x8(x, bsize);
- vp9_transform_sbuv_8x8(x, bsize);
- vp9_quantize_sby_8x8(x, bsize);
- vp9_quantize_sbuv_8x8(x, bsize);
- if (x->optimize) {
- vp9_optimize_sby_8x8(cm, x, bsize);
- vp9_optimize_sbuv_8x8(cm, x, bsize);
- }
- vp9_inverse_transform_sby_8x8(xd, bsize);
- vp9_inverse_transform_sbuv_8x8(xd, bsize);
- break;
- case TX_4X4:
- vp9_transform_sby_4x4(x, bsize);
- vp9_transform_sbuv_4x4(x, bsize);
- vp9_quantize_sby_4x4(x, bsize);
- vp9_quantize_sbuv_4x4(x, bsize);
- if (x->optimize) {
- vp9_optimize_sby_4x4(cm, x, bsize);
- vp9_optimize_sbuv_4x4(cm, x, bsize);
- }
- vp9_inverse_transform_sby_4x4(xd, bsize);
- vp9_inverse_transform_sbuv_4x4(xd, bsize);
- break;
- default: assert(0);
- }
- vp9_recon_sb_c(xd, bsize);
+ if (xd->mode_info_context->mbmi.mode == I4X4_PRED) {
+ assert(bsize == BLOCK_SIZE_SB8X8);
+ vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize);
+ } else if (!x->skip) {
+ vp9_encode_sb(cm, x, bsize);
vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize);
} else {
// FIXME(rbultje): not tile-aware (mi - 1)
@@ -2315,8 +2078,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) {
if (bsize >= BLOCK_SIZE_SB32X32) {
cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++;
- } else {
+ } else if (bsize >= BLOCK_SIZE_MB16X16) {
cpi->txfm_count_16x16p[mi->mbmi.txfm_size]++;
+ } else {
+ cpi->txfm_count_8x8p[mi->mbmi.txfm_size]++;
}
} else {
int x, y;
@@ -2324,6 +2089,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t,
if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32)
sz = TX_16X16;
+ if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16)
+ sz = TX_8X8;
+ if (sz == TX_8X8 && (xd->mode_info_context->mbmi.mode == SPLITMV ||
+ xd->mode_info_context->mbmi.mode == I4X4_PRED))
+ sz = TX_4X4;
for (y = 0; y < bh; y++) {
for (x = 0; x < bw; x++) {
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index f6ddca8f4..72a6603f8 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -57,36 +57,37 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib,
raster_block_offset_int16(xd, bsize, 0, ib,
xd->plane[0].diff);
int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
+ const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- assert(ib < 16);
+ assert(ib < (1 << (bwl + bhl)));
#if CONFIG_NEWBINTRAMODES
xd->mode_info_context->bmi[ib].as_mode.context =
vp9_find_bpred_context(&x->e_mbd, ib, dst, xd->plane[0].dst.stride);
#endif
- vp9_intra4x4_predict(&x->e_mbd, ib,
+ vp9_intra4x4_predict(&x->e_mbd, ib, bsize,
xd->mode_info_context->bmi[ib].as_mode.first,
dst, xd->plane[0].dst.stride);
- vp9_subtract_block(4, 4, src_diff, 16,
+ vp9_subtract_block(4, 4, src_diff, 4 << bwl,
src, x->plane[0].src.stride,
dst, xd->plane[0].dst.stride);
tx_type = get_tx_type_4x4(&x->e_mbd, ib);
if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
+ vp9_short_fht4x4(src_diff, coeff, 4 << bwl, tx_type);
x->quantize_b_4x4(x, ib, tx_type, 16);
vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
- diff, 16, tx_type);
+ diff, 4 << bwl, tx_type);
} else {
- x->fwd_txm4x4(src_diff, coeff, 32);
+ x->fwd_txm4x4(src_diff, coeff, 8 << bwl);
x->quantize_b_4x4(x, ib, tx_type, 16);
vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib],
BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
- diff, 32);
+ diff, 8 << bwl);
}
- vp9_recon_b(dst, diff, dst, xd->plane[0].dst.stride);
+ vp9_recon_b(dst, diff, 4 << bwl, dst, xd->plane[0].dst.stride);
}
void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) {
@@ -100,207 +101,14 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) {
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16);
- vp9_subtract_sby(x, BLOCK_SIZE_MB16X16);
-
- switch (tx_size) {
- case TX_16X16:
- vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16);
- break;
- case TX_8X8:
- vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16);
- break;
- default:
- vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16);
- break;
- }
-
- vp9_recon_sby(xd, BLOCK_SIZE_MB16X16);
+ vp9_encode_sby(cm, x, BLOCK_SIZE_MB16X16);
}
void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) {
MACROBLOCKD *xd = &x->e_mbd;
- TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16);
- vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16);
-
- switch (tx_size) {
- case TX_4X4:
- vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16);
- break;
- default: // 16x16 or 8x8
- vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16);
- break;
- }
-
- vp9_recon_sbuv(xd, BLOCK_SIZE_MB16X16);
-}
-
-void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
- MACROBLOCKD *xd = &x->e_mbd;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src.buf, x->plane[0].src.stride);
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src_diff);
- int16_t* const diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
- xd->plane[0].diff);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
- const int iblock[4] = {0, 1, 4, 5};
- int i;
- TX_TYPE tx_type;
-
- vp9_intra8x8_predict(xd, ib, xd->mode_info_context->bmi[ib].as_mode.first,
- dst, xd->plane[0].dst.stride);
- // generate residual blocks
- vp9_subtract_block(8, 8, src_diff, 16,
- src, x->plane[0].src.stride,
- dst, xd->plane[0].dst.stride);
-
- if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- int idx = (ib & 0x02) ? (ib + 2) : ib;
- int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
-
- assert(idx < 16);
- tx_type = get_tx_type_8x8(xd, ib);
- if (tx_type != DCT_DCT) {
- vp9_short_fht8x8(src_diff, coeff, 16, tx_type);
- x->quantize_b_8x8(x, idx, tx_type, 16);
- vp9_short_iht8x8(dqcoeff, diff, 16, tx_type);
- } else {
- x->fwd_txm8x8(src_diff, coeff, 32);
- x->quantize_b_8x8(x, idx, DCT_DCT, 16);
- vp9_short_idct8x8(dqcoeff, diff, 32);
- }
- } else {
- for (i = 0; i < 4; i++) {
- int idx = ib + iblock[i];
- int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx,
- x->plane[0].src_diff);
- int16_t* const diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx,
- xd->plane[0].diff);
-
- assert(idx < 16);
- tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
- if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
- vp9_short_iht4x4(dqcoeff, diff, 16, tx_type);
- } else if (!(i & 1) &&
- get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
- x->fwd_txm8x4(src_diff, coeff, 32);
- x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
- vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
- dqcoeff, diff, 32);
- vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i] + 1],
- dqcoeff + 16, diff + 4, 32);
- i++;
- } else {
- x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
- vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]],
- dqcoeff, diff, 32);
- }
- }
- }
-
- // reconstruct submacroblock
- for (i = 0; i < 4; i++) {
- int16_t* const diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i],
- xd->plane[0].diff);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i],
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
- vp9_recon_b_c(dst, diff, dst, xd->plane[0].dst.stride);
- }
-}
-
-void vp9_encode_intra8x8mby(MACROBLOCK *x) {
- int i;
-
- for (i = 0; i < 4; i++)
- vp9_encode_intra8x8(x, vp9_i8x8_block[i]);
-}
-
-static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
- MACROBLOCKD * const xd = &x->e_mbd;
- int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib);
- int16_t* const coeff = MB_SUBBLOCK_FIELD(x, coeff, ib);
- const int plane = ib < 20 ? 1 : 2;
- const int block = ib < 20 ? ib - 16 : ib - 20;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block,
- x->plane[plane].src.buf,
- x->plane[plane].src.stride);
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block,
- x->plane[plane].src_diff);
- int16_t* const diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block,
- xd->plane[plane].diff);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block,
- xd->plane[plane].dst.buf,
- xd->plane[plane].dst.stride);
-
- assert(ib >= 16 && ib < 24);
- vp9_intra_uv4x4_predict(&x->e_mbd, ib, mode,
- dst, xd->plane[plane].dst.stride);
-
- assert(xd->plane[1].subsampling_x == 1);
- vp9_subtract_block(4, 4, src_diff, 8,
- src, x->plane[plane].src.stride,
- dst, xd->plane[plane].dst.stride);
-
- x->fwd_txm4x4(src_diff, coeff, 16);
- x->quantize_b_4x4(x, ib, DCT_DCT, 16);
- vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block],
- dqcoeff, diff, 16);
-
- vp9_recon_uv_b_c(dst, diff, dst, xd->plane[plane].dst.stride);
-}
-
-void vp9_encode_intra8x8mbuv(MACROBLOCK *x) {
- int i;
-
- for (i = 0; i < 4; i++) {
- int mode = x->e_mbd.mode_info_context->bmi[vp9_i8x8_block[i]].as_mode.first;
-
- encode_intra_uv4x4(x, i + 16, mode); // u
- encode_intra_uv4x4(x, i + 20, mode); // v
- }
+ vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16);
}
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h
index 7ec2f11d4..c26200494 100644
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -17,8 +17,4 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs);
-void vp9_encode_intra8x8mby(MACROBLOCK *x);
-void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
-void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
-
#endif // VP9_ENCODER_VP9_ENCODEINTRA_H_
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 0cb1ae958..421052753 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -67,143 +67,6 @@ void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
}
-void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 3);
- const int stride = 32 << bwl;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
-
- vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32,
- x->plane[0].coeff + n * 1024, stride * 2);
- }
-}
-
-void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 2);
- const int stride = 16 << bwl, bstride = 4 << bwl;
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- const TX_TYPE tx_type = get_tx_type_16x16(xd,
- (y_idx * bstride + x_idx) * 4);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht16x16(x->plane[0].src_diff +
- y_idx * stride * 16 + x_idx * 16,
- x->plane[0].coeff + n * 256, stride, tx_type);
- } else {
- x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16,
- x->plane[0].coeff + n * 256, stride * 2);
- }
- }
-}
-
-void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 1);
- const int stride = 8 << bwl, bstride = 2 << bwl;
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
- x->plane[0].coeff + n * 64, stride, tx_type);
- } else {
- x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8,
- x->plane[0].coeff + n * 64, stride * 2);
- }
- }
-}
-
-void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bw = 1 << bwl;
- const int bh = 1 << b_height_log2(bsize);
- const int stride = 4 << bwl;
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
-
- if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
- x->plane[0].coeff + n * 16, stride, tx_type);
- } else {
- x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4,
- x->plane[0].coeff + n * 16, stride * 2);
- }
- }
-}
-
-void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- assert(bsize == BLOCK_SIZE_SB64X64);
- vp9_clear_system_state();
- vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64);
- vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64);
-}
-
-void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2;
- const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
- const int stride = 16 << (bwl - 1);
- int n;
-
- vp9_clear_system_state();
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
-
- x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16,
- x->plane[1].coeff + n * 256, stride * 2);
- x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16,
- x->plane[2].coeff + n * 256, stride * 2);
- }
-}
-
-void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1;
- const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
- const int stride = 8 << (bwl - 1);
- int n;
-
- vp9_clear_system_state();
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
-
- x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8,
- x->plane[1].coeff + n * 64, stride * 2);
- x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8,
- x->plane[2].coeff + n * 64, stride * 2);
- }
-}
-
-void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1);
- const int stride = 4 << (bwl - 1);
- int n;
-
- vp9_clear_system_state();
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
-
- x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4,
- x->plane[1].coeff + n * 16, stride * 2);
- x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4,
- x->plane[2].coeff + n * 16, stride * 2);
- }
-}
-
#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
#define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
typedef struct vp9_token_state vp9_token_state;
@@ -249,57 +112,53 @@ static int trellis_get_coeff_context(const int *scan,
return pt;
}
-static void optimize_b(VP9_COMMON *const cm,
- MACROBLOCK *mb, int ib, PLANE_TYPE type,
- const int16_t *dequant_ptr,
+static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
+ int plane, int block, BLOCK_SIZE_TYPE bsize,
ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- int tx_size, int y_blocks) {
+ TX_SIZE tx_size) {
const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME;
MACROBLOCKD *const xd = &mb->e_mbd;
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
- const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff,
- pb_idx.block, 16);
+ const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff,
+ block, 16);
int16_t *qcoeff_ptr;
int16_t *dqcoeff_ptr;
- int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0;
+ int eob = xd->plane[plane].eobs[block], final_eob, sz = 0;
const int i0 = 0;
int rc, x, next, i;
int64_t rdmult, rddiv, rd_cost0, rd_cost1;
int rate0, rate1, error0, error1, t0, t1;
int best, band, pt;
+ PLANE_TYPE type = xd->plane[plane].plane_type;
int err_mult = plane_rd_mult[type];
int default_eob, pad;
int const *scan, *nb;
const int mul = 1 + (tx_size == TX_32X32);
uint8_t token_cache[1024];
+ const int ib = txfrm_block_to_raster_block(xd, bsize, plane,
+ block, 2 * tx_size);
+ const int16_t *dequant_ptr = xd->plane[plane].dequant;
- assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
- dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16);
- qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16);
+ assert((!type && !plane) || (type && plane));
+ dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16);
+ qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
switch (tx_size) {
default:
case TX_4X4: {
- const TX_TYPE tx_type = get_tx_type_4x4(xd, ib);
+ const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT;
default_eob = 16;
scan = get_scan_4x4(tx_type);
break;
}
case TX_8X8: {
- const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
- const int sz = 1 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
- const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1));
+ const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT;
scan = get_scan_8x8(tx_type);
default_eob = 64;
break;
}
case TX_16X16: {
- const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
- const int sz = 2 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
- const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2));
+ const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT;
scan = get_scan_16x16(tx_type);
default_eob = 256;
break;
@@ -480,272 +339,268 @@ static void optimize_b(VP9_COMMON *const cm,
}
final_eob++;
- xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob;
+ xd->plane[plane].eobs[block] = final_eob;
*a = *l = (final_eob > 0);
}
-void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT *a = xd->plane[0].above_context;
- ENTROPY_CONTEXT *l = xd->plane[0].left_context;
- const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 3);
- ENTROPY_CONTEXT ta[2], tl[2];
- int n;
-
- for (n = 0; n < bw; n++, a += 8)
- ta[n] = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0;
- for (n = 0; n < bh; n++, l += 8)
- tl[n] = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
-
- optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
- ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh);
- }
+struct optimize_block_args {
+ VP9_COMMON *cm;
+ MACROBLOCK *x;
+ struct optimize_ctx *ctx;
+};
+
+void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb,
+ struct optimize_ctx *ctx) {
+ MACROBLOCKD* const xd = &mb->e_mbd;
+ int x, y;
+
+ // find current entropy context
+ txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y);
+
+ optimize_b(cm, mb, plane, block, bsize,
+ &ctx->ta[plane][x], &ctx->tl[plane][y],
+ ss_txfrm_size / 2);
}
-void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT *a = xd->plane[0].above_context;
- ENTROPY_CONTEXT *l = xd->plane[0].left_context;
- const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 2);
- ENTROPY_CONTEXT ta[4], tl[4];
- int n;
-
- for (n = 0; n < bw; n++, a += 4)
- ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0;
- for (n = 0; n < bh; n++, l += 4)
- tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
-
- optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
- ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh);
- }
+static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, void *arg) {
+ const struct optimize_block_args* const args = arg;
+ vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x,
+ args->ctx);
}
-void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT *a = xd->plane[0].above_context;
- ENTROPY_CONTEXT *l = xd->plane[0].left_context;
- const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 1);
- ENTROPY_CONTEXT ta[8], tl[8];
- int n;
-
- for (n = 0; n < bw; n++, a += 2)
- ta[n] = (a[0] + a[1]) != 0;
- for (n = 0; n < bh; n++, l += 2)
- tl[n] = (l[0] + l[1]) != 0;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
-
- optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
- ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh);
+void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
+ struct optimize_ctx *ctx) {
+ int p;
+
+ for (p = 0; p < MAX_MB_PLANE; p++) {
+ const struct macroblockd_plane* const plane = &xd->plane[p];
+ const int bwl = b_width_log2(bsize) - plane->subsampling_x;
+ const int bhl = b_height_log2(bsize) - plane->subsampling_y;
+ const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p);
+ int i, j;
+
+ for (i = 0; i < 1 << bwl; i += 1 << tx_size) {
+ int c = 0;
+ ctx->ta[p][i] = 0;
+ for (j = 0; j < 1 << tx_size && !c; j++) {
+ c = ctx->ta[p][i] |= plane->above_context[i + j];
+ }
+ }
+ for (i = 0; i < 1 << bhl; i += 1 << tx_size) {
+ int c = 0;
+ ctx->tl[p][i] = 0;
+ for (j = 0; j < 1 << tx_size && !c; j++) {
+ c = ctx->tl[p][i] |= plane->left_context[i + j];
+ }
+ }
}
}
-void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int bwl = b_width_log2(bsize), bw = 1 << bwl;
- int bh = 1 << b_height_log2(bsize);
- ENTROPY_CONTEXT ta[16], tl[16];
- int n;
+void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ struct optimize_ctx ctx;
+ struct optimize_block_args arg = {cm, x, &ctx};
+ vp9_optimize_init(&x->e_mbd, bsize, &ctx);
+ foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0,
+ optimize_block, &arg);
+}
- vpx_memcpy(ta, xd->plane[0].above_context, sizeof(ENTROPY_CONTEXT) * bw);
- vpx_memcpy(tl, xd->plane[0].left_context, sizeof(ENTROPY_CONTEXT) * bh);
+void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ struct optimize_ctx ctx;
+ struct optimize_block_args arg = {cm, x, &ctx};
+ vp9_optimize_init(&x->e_mbd, bsize, &ctx);
+ foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg);
+}
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
+struct encode_b_args {
+ VP9_COMMON *cm;
+ MACROBLOCK *x;
+ struct optimize_ctx *ctx;
+};
- optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant,
- ta + x_idx, tl + y_idx, TX_4X4, bh * bw);
+static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, void *arg) {
+ struct encode_b_args* const args = arg;
+ MACROBLOCK* const x = args->x;
+ MACROBLOCKD* const xd = &x->e_mbd;
+ const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
+ const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
+ block, ss_txfrm_size);
+ int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane,
+ raster_block,
+ x->plane[plane].src_diff);
+ TX_TYPE tx_type = DCT_DCT;
+
+ switch (ss_txfrm_size / 2) {
+ case TX_32X32:
+ vp9_short_fdct32x32(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw * 2);
+ break;
+ case TX_16X16:
+ tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht16x16(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw, tx_type);
+ } else {
+ x->fwd_txm16x16(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw * 2);
+ }
+ break;
+ case TX_8X8:
+ tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht8x8(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw, tx_type);
+ } else {
+ x->fwd_txm8x8(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw * 2);
+ }
+ break;
+ case TX_4X4:
+ tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
+ if (tx_type != DCT_DCT) {
+ vp9_short_fht4x4(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw, tx_type);
+ } else {
+ x->fwd_txm4x4(src_diff,
+ BLOCK_OFFSET(x->plane[plane].coeff, block, 16),
+ bw * 2);
+ }
+ break;
+ default:
+ assert(0);
}
-}
-void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- int b;
-
- assert(bsize == BLOCK_SIZE_SB64X64);
- for (b = 256; b < 384; b += 64) {
- const int plane = 1 + (b >= 320);
- ENTROPY_CONTEXT *a = xd->plane[plane].above_context;
- ENTROPY_CONTEXT *l = xd->plane[plane].left_context;
- ENTROPY_CONTEXT a_ec, l_ec;
-
- a_ec = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0;
- l_ec = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0;
- optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant,
- &a_ec, &l_ec, TX_32X32, 256);
- }
+ vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type);
}
-void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2;
- const int bw = 1 << (bwl - 1);
- const int bh = 1 << (bhl - 1);
- int uvoff = 16 << (bwl + bhl);
- int plane, n;
-
- for (plane = 1; plane < MAX_MB_PLANE; plane++) {
- ENTROPY_CONTEXT ta[2], *a = xd->plane[plane].above_context;
- ENTROPY_CONTEXT tl[2], *l = xd->plane[plane].left_context;
-
- for (n = 0; n < bw; n++, a += 4)
- ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0;
- for (n = 0; n < bh; n++, l += 4)
- tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV,
- x->e_mbd.plane[plane].dequant,
- &ta[x_idx], &tl[y_idx],
- TX_16X16, bh * bw * 64);
- }
- uvoff = (uvoff * 5) >> 2; // switch u -> v
+static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, void *arg) {
+ struct encode_b_args* const args = arg;
+ MACROBLOCK* const x = args->x;
+ MACROBLOCKD* const xd = &x->e_mbd;
+ const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x);
+ const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
+ block, ss_txfrm_size);
+ int16_t* const diff = raster_block_offset_int16(xd, bsize, plane,
+ raster_block,
+ xd->plane[plane].diff);
+ TX_TYPE tx_type = DCT_DCT;
+
+ xform_quant(plane, block, bsize, ss_txfrm_size, arg);
+
+ if (x->optimize)
+ vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx);
+
+ switch (ss_txfrm_size / 2) {
+ case TX_32X32:
+ vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw * 2);
+ break;
+ case TX_16X16:
+ tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw * 2);
+ } else {
+ vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw, tx_type);
+ }
+ break;
+ case TX_8X8:
+ tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ vp9_short_idct8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw * 2);
+ } else {
+ vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw, tx_type);
+ }
+ break;
+ case TX_4X4:
+ tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
+ if (tx_type == DCT_DCT) {
+ // this is like vp9_short_idct4x4 but has a special case around eob<=1
+ // which is significant (not just an optimization) for the lossless
+ // case.
+ vp9_inverse_transform_b_4x4(xd, xd->plane[plane].eobs[block],
+ BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2);
+ } else {
+ vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ diff, bw, tx_type);
+ }
+ break;
}
}
-void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1;
- const int bw = 1 << (bwl - 1);
- const int bh = 1 << (bhl - 1);
- int uvoff = 4 << (bwl + bhl);
- int plane, n;
-
- for (plane = 1; plane < MAX_MB_PLANE; plane++) {
- ENTROPY_CONTEXT ta[4], *a = xd->plane[plane].above_context;
- ENTROPY_CONTEXT tl[4], *l = xd->plane[plane].left_context;
-
- for (n = 0; n < bw; n++, a += 2)
- ta[n] = (a[0] + a[1]) != 0;
- for (n = 0; n < bh; n++, l += 2)
- tl[n] = (l[0] + l[1]) != 0;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV,
- x->e_mbd.plane[plane].dequant,
- &ta[x_idx], &tl[y_idx],
- TX_8X8, bh * bw * 16);
- }
- uvoff = (uvoff * 5) >> 2; // switch u -> v
- }
+void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct encode_b_args arg = {cm, x, NULL};
+
+ foreach_transformed_block_in_plane(xd, bsize, 0,
+ xform_quant, &arg);
}
-void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- const int bw = 1 << (bwl - 1);
- const int bh = 1 << (bhl - 1);
- int uvoff = 1 << (bwl + bhl);
- int plane, n;
-
- for (plane = 1; plane < MAX_MB_PLANE; plane++) {
- ENTROPY_CONTEXT ta[8], tl[8];
-
- vpx_memcpy(ta, xd->plane[plane].above_context,
- sizeof(ENTROPY_CONTEXT) * bw);
- vpx_memcpy(tl, xd->plane[plane].left_context,
- sizeof(ENTROPY_CONTEXT) * bh);
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1);
- optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV,
- x->e_mbd.plane[plane].dequant,
- &ta[x_idx], &tl[y_idx],
- TX_4X4, bh * bw * 4);
- }
- uvoff = (uvoff * 5) >> 2; // switch u -> v
- }
+void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct encode_b_args arg = {cm, x, NULL};
+
+ foreach_transformed_block_uv(xd, bsize, xform_quant, &arg);
}
-void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) {
- MACROBLOCKD *const xd = &x->e_mbd;
- const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size;
-
- if (tx_size == TX_16X16) {
- vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16);
- vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- if (x->optimize) {
- vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16);
- vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16);
- }
- vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16);
- } else if (tx_size == TX_8X8) {
- vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16);
- if (xd->mode_info_context->mbmi.mode == SPLITMV) {
- assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4);
- vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16);
- } else {
- vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16);
- if (x->optimize)
- vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16);
- }
- } else {
- vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16);
- if (x->optimize) {
- vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16);
- vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16);
- }
- vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16);
- }
+void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {cm, x, &ctx};
+
+ vp9_subtract_sby(x, bsize);
+ if (x->optimize)
+ vp9_optimize_init(xd, bsize, &ctx);
+
+ foreach_transformed_block_in_plane(xd, bsize, 0,
+ encode_block, &arg);
+
+ vp9_recon_sby(xd, bsize);
}
-void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- int mi_row, int mi_col) {
- MACROBLOCKD *const xd = &x->e_mbd;
+void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {cm, x, &ctx};
+
+ vp9_subtract_sbuv(x, bsize);
+ if (x->optimize)
+ vp9_optimize_init(xd, bsize, &ctx);
+
+ foreach_transformed_block_uv(xd, bsize, encode_block, &arg);
- vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
- vp9_subtract_sb(x, BLOCK_SIZE_MB16X16);
- vp9_fidct_mb(cm, x);
- vp9_recon_sb(xd, BLOCK_SIZE_MB16X16);
+ vp9_recon_sbuv(xd, bsize);
}
-/* this function is used by first pass only */
-void vp9_encode_inter16x16y(MACROBLOCK *x, int mi_row, int mi_col) {
- MACROBLOCKD *xd = &x->e_mbd;
+void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize) {
+ MACROBLOCKD* const xd = &x->e_mbd;
+ struct optimize_ctx ctx;
+ struct encode_b_args arg = {cm, x, &ctx};
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16);
- vp9_subtract_sby(x, BLOCK_SIZE_MB16X16);
+ vp9_subtract_sb(x, bsize);
+ if (x->optimize)
+ vp9_optimize_init(xd, bsize, &ctx);
- vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16);
- vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16);
+ foreach_transformed_block(xd, bsize, encode_block, &arg);
- vp9_recon_sby(xd, BLOCK_SIZE_MB16X16);
+ vp9_recon_sb(xd, bsize);
}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index da134a86b..afbe4466b 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -22,39 +22,29 @@ typedef struct {
MV_REFERENCE_FRAME second_ref_frame;
} MODE_DEFINITION;
-
-struct VP9_ENCODER_RTCD;
-void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- int mb_row, int mb_col);
-
-void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col);
-
-void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
+struct optimize_ctx {
+ ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
+ ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
+};
+void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize,
+ struct optimize_ctx *ctx);
+void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
+ int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x,
+ struct optimize_ctx *ctx);
+void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize);
+void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize);
+
+void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
+void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
+void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize);
+
+void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize);
+void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize);
-
-void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x);
void vp9_subtract_block(int rows, int cols,
int16_t *diff_ptr, int diff_stride,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index e4d68630d..a1898af48 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -14,7 +14,6 @@
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_variance.h"
#include "vp9/encoder/vp9_encodeintra.h"
-#include "vp9/common/vp9_setupintrarecon.h"
#include "vp9/encoder/vp9_mcomp.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vpx_scale/vpx_scale.h"
@@ -247,8 +246,8 @@ static void avg_stats(FIRSTPASS_STATS *section) {
// Calculate a modified Error used in distributing bits between easier and harder frames
static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
- double av_err = (cpi->twopass.total_stats->ssim_weighted_pred_err /
- cpi->twopass.total_stats->count);
+ double av_err = (cpi->twopass.total_stats.ssim_weighted_pred_err /
+ cpi->twopass.total_stats.count);
double this_err = this_frame->ssim_weighted_pred_err;
double modified_err;
@@ -328,7 +327,7 @@ static int frame_max_bits(VP9_COMP *cpi) {
// For VBR base this on the bits and frames left plus the
// two_pass_vbrmax_section rate passed in by the user.
max_bits = (int) (((double) cpi->twopass.bits_left
- / (cpi->twopass.total_stats->count - (double) cpi->common
+ / (cpi->twopass.total_stats.count - (double) cpi->common
.current_video_frame))
* ((double) cpi->oxcf.two_pass_vbrmax_section / 100.0));
@@ -340,11 +339,11 @@ static int frame_max_bits(VP9_COMP *cpi) {
}
void vp9_init_first_pass(VP9_COMP *cpi) {
- zero_stats(cpi->twopass.total_stats);
+ zero_stats(&cpi->twopass.total_stats);
}
void vp9_end_first_pass(VP9_COMP *cpi) {
- output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats);
+ output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats);
}
static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) {
@@ -486,8 +485,6 @@ void vp9_first_pass(VP9_COMP *cpi) {
vp9_setup_block_dptrs(&x->e_mbd);
- // set up frame new frame for intra coded blocks
- vp9_setup_intra_recon(new_yv12);
vp9_frame_init_quantizer(cpi);
// Initialise the MV cost table to the defaults
@@ -521,9 +518,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
set_mi_row_col(cm, xd,
- mb_row << CONFIG_SB8X8,
+ mb_row << 1,
1 << mi_height_log2(BLOCK_SIZE_MB16X16),
- mb_col << CONFIG_SB8X8,
+ mb_col << 1,
1 << mi_height_log2(BLOCK_SIZE_MB16X16));
xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
@@ -626,7 +623,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
this_error = motion_error;
vp9_set_mbmode_and_mvs(x, NEWMV, &mv);
xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- vp9_encode_inter16x16y(x, mb_row, mb_col);
+ vp9_build_inter_predictors_sby(xd, mb_row << 1,
+ mb_col << 1,
+ BLOCK_SIZE_MB16X16);
+ vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16);
sum_mvr += mv.as_mv.row;
sum_mvr_abs += abs(mv.as_mv.row);
sum_mvc += mv.as_mv.col;
@@ -751,20 +751,20 @@ void vp9_first_pass(VP9_COMP *cpi) {
- cpi->source->ts_start);
// don't want to do output stats with a stack variable!
- memcpy(cpi->twopass.this_frame_stats,
+ memcpy(&cpi->twopass.this_frame_stats,
&fps,
sizeof(FIRSTPASS_STATS));
- output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats);
- accumulate_stats(cpi->twopass.total_stats, &fps);
+ output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats);
+ accumulate_stats(&cpi->twopass.total_stats, &fps);
}
// Copy the previous Last Frame back into gf and and arf buffers if
// the prediction is good enough... but also dont allow it to lag too far
if ((cpi->twopass.sr_update_lag > 3) ||
((cm->current_video_frame > 0) &&
- (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) &&
- ((cpi->twopass.this_frame_stats->intra_error /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats->coded_error)) >
+ (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) &&
+ ((cpi->twopass.this_frame_stats.intra_error /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) >
2.0))) {
vp8_yv12_copy_frame(lst_yv12, gld_yv12);
cpi->twopass.sr_update_lag = 1;
@@ -995,7 +995,7 @@ static int estimate_max_q(VP9_COMP *cpi,
// Give average a chance to settle though.
// PGW TODO.. This code is broken for the extended Q range
if ((cpi->ni_frames >
- ((int)cpi->twopass.total_stats->count >> 8)) &&
+ ((int)cpi->twopass.total_stats.count >> 8)) &&
(cpi->ni_frames > 25)) {
adjust_maxq_qrange(cpi);
}
@@ -1052,8 +1052,8 @@ static int estimate_cq(VP9_COMP *cpi,
}
// II ratio correction factor for clip as a whole
- clip_iiratio = cpi->twopass.total_stats->intra_error /
- DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error);
+ clip_iiratio = cpi->twopass.total_stats.intra_error /
+ DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error);
clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025);
if (clip_iifactor < 0.80)
clip_iifactor = 0.80;
@@ -1098,14 +1098,14 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
if (two_pass_min_rate < lower_bounds_min_rate)
two_pass_min_rate = lower_bounds_min_rate;
- zero_stats(cpi->twopass.total_stats);
- zero_stats(cpi->twopass.total_left_stats);
+ zero_stats(&cpi->twopass.total_stats);
+ zero_stats(&cpi->twopass.total_left_stats);
if (!cpi->twopass.stats_in_end)
return;
- *cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
- *cpi->twopass.total_left_stats = *cpi->twopass.total_stats;
+ cpi->twopass.total_stats = *cpi->twopass.stats_in_end;
+ cpi->twopass.total_left_stats = cpi->twopass.total_stats;
// each frame can have a different duration, as the frame rate in the source
// isn't guaranteed to be constant. The frame rate prior to the first frame
@@ -1113,13 +1113,13 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// Its calculated based on the actual durations of all frames from the first
// pass.
vp9_new_frame_rate(cpi,
- 10000000.0 * cpi->twopass.total_stats->count /
- cpi->twopass.total_stats->duration);
+ 10000000.0 * cpi->twopass.total_stats.count /
+ cpi->twopass.total_stats.duration);
cpi->output_frame_rate = cpi->oxcf.frame_rate;
- cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration *
+ cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
cpi->oxcf.target_bandwidth / 10000000.0);
- cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration *
+ cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration *
two_pass_min_rate / 10000000.0);
// Calculate a minimum intra value to be used in determining the IIratio
@@ -1145,7 +1145,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
sum_iiratio += IIRatio;
}
- cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count);
+ cpi->twopass.avg_iiratio = sum_iiratio /
+ DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count);
// Reset file position
reset_fpf_position(cpi, start_pos);
@@ -1828,7 +1829,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left.
// This is also important for short clips where there may only be one
// key frame.
- if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count -
+ if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count -
cpi->common.current_video_frame)) {
cpi->twopass.kf_group_bits =
(cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0;
@@ -2096,7 +2097,7 @@ static int adjust_active_maxq(int old_maxqi, int new_maxqi) {
void vp9_second_pass(VP9_COMP *cpi) {
int tmp_q;
- int frames_left = (int)(cpi->twopass.total_stats->count -
+ int frames_left = (int)(cpi->twopass.total_stats.count -
cpi->common.current_video_frame);
FIRSTPASS_STATS this_frame;
@@ -2121,7 +2122,7 @@ void vp9_second_pass(VP9_COMP *cpi) {
est_cq =
estimate_cq(cpi,
- cpi->twopass.total_left_stats,
+ &cpi->twopass.total_left_stats,
(int)(cpi->twopass.bits_left / frames_left));
cpi->cq_target_quality = cpi->oxcf.cq_level;
@@ -2135,7 +2136,7 @@ void vp9_second_pass(VP9_COMP *cpi) {
tmp_q = estimate_max_q(
cpi,
- cpi->twopass.total_left_stats,
+ &cpi->twopass.total_left_stats,
(int)(cpi->twopass.bits_left / frames_left));
cpi->active_worst_quality = tmp_q;
@@ -2158,15 +2159,15 @@ void vp9_second_pass(VP9_COMP *cpi) {
// radical adjustments to the allowed quantizer range just to use up a
// few surplus bits or get beneath the target rate.
else if ((cpi->common.current_video_frame <
- (((unsigned int)cpi->twopass.total_stats->count * 255) >> 8)) &&
+ (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) &&
((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
- (unsigned int)cpi->twopass.total_stats->count)) {
+ (unsigned int)cpi->twopass.total_stats.count)) {
if (frames_left < 1)
frames_left = 1;
tmp_q = estimate_max_q(
cpi,
- cpi->twopass.total_left_stats,
+ &cpi->twopass.total_left_stats,
(int)(cpi->twopass.bits_left / frames_left));
// Make a damped adjustment to active max Q
@@ -2245,7 +2246,7 @@ void vp9_second_pass(VP9_COMP *cpi) {
cpi->twopass.frames_to_key--;
// Update the total stats remaining structure
- subtract_stats(cpi->twopass.total_left_stats, &this_frame);
+ subtract_stats(&cpi->twopass.total_left_stats, &this_frame);
}
static int test_candidate_kf(VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index fe5d114ba..018c86cb9 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -11,7 +11,6 @@
#include <limits.h>
#include <vp9/encoder/vp9_encodeintra.h>
#include <vp9/encoder/vp9_rdopt.h>
-#include <vp9/common/vp9_setupintrarecon.h>
#include <vp9/common/vp9_blockd.h>
#include <vp9/common/vp9_reconinter.h>
#include <vp9/common/vp9_systemdependent.h>
@@ -386,7 +385,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
// goes in segment 0
if (arf_not_zz[offset + mb_col]) {
ncnt[0]++;
-#if CONFIG_SB8X8
cpi->segmentation_map[offset * 4 + 2 * mb_col] = 0;
cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 0;
cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 0;
@@ -396,11 +394,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 1;
cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 1;
cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols + 1] = 1;
-#else
- cpi->segmentation_map[offset + mb_col] = 0;
- } else {
- cpi->segmentation_map[offset + mb_col] = 1;
-#endif
ncnt[1]++;
}
}
@@ -419,10 +412,10 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
cpi->static_mb_pct = 0;
cpi->seg0_cnt = ncnt[0];
- vp9_enable_segmentation((VP9_PTR) cpi);
+ vp9_enable_segmentation((VP9_PTR)cpi);
} else {
cpi->static_mb_pct = 0;
- vp9_disable_segmentation((VP9_PTR) cpi);
+ vp9_disable_segmentation((VP9_PTR)cpi);
}
// Free localy allocated storage
diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c
index 7d9462f94..e26daf0c9 100644
--- a/vp9/encoder/vp9_modecosts.c
+++ b/vp9/encoder/vp9_modecosts.c
@@ -41,8 +41,6 @@ void vp9_init_mode_costs(VP9_COMP *c) {
x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree);
- vp9_cost_tokens(c->mb.i8x8_mode_costs,
- x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree);
for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i)
vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0af232eed..ffee34eb7 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -280,8 +280,7 @@ static void setup_features(VP9_COMP *cpi) {
MACROBLOCKD *xd = &cpi->mb.e_mbd;
// Set up default state for MB feature flags
-
- xd->segmentation_enabled = 0; // Default segmentation disabled
+ xd->segmentation_enabled = 0;
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 0;
@@ -333,15 +332,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->mb.pip);
cpi->mb.pip = 0;
-
- vpx_free(cpi->twopass.total_stats);
- cpi->twopass.total_stats = 0;
-
- vpx_free(cpi->twopass.total_left_stats);
- cpi->twopass.total_left_stats = 0;
-
- vpx_free(cpi->twopass.this_frame_stats);
- cpi->twopass.this_frame_stats = 0;
}
// Computes a q delta (in "q index" terms) to get from a starting q value
@@ -383,7 +373,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 0;
#if CONFIG_IMPLICIT_SEGMENTATION
- xd->allow_implicit_segment_update = 0;
+ xd->allow_implicit_segment_update = 0;
#endif
cpi->static_mb_pct = 0;
@@ -399,7 +389,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 0;
#if CONFIG_IMPLICIT_SEGMENTATION
- xd->allow_implicit_segment_update = 0;
+ xd->allow_implicit_segment_update = 0;
#endif
cpi->static_mb_pct = 0;
@@ -428,9 +418,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
xd->mb_segment_abs_delta = SEGMENT_DELTADATA;
}
- }
- // All other frames if segmentation has been enabled
- else if (xd->segmentation_enabled) {
+ } else if (xd->segmentation_enabled) {
+ // All other frames if segmentation has been enabled
+
// First normal frame in a valid gf or alt ref group
if (cpi->common.frames_since_golden == 0) {
// Set up segment features for normal frames in an arf group
@@ -454,10 +444,10 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP);
}
- }
- // Disable segmentation and clear down features if alt ref
- // is not active for this group
- else {
+ } else {
+ // Disable segmentation and clear down features if alt ref
+ // is not active for this group
+
vp9_disable_segmentation((VP9_PTR)cpi);
vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols);
@@ -467,12 +457,11 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
vp9_clearall_segfeatures(xd);
}
- }
+ } else if (cpi->is_src_frame_alt_ref) {
+ // Special case where we are coding over the top of a previous
+ // alt ref frame.
+ // Segment coding disabled for compred testing
- // Special case where we are coding over the top of a previous
- // alt ref frame.
- // Segment coding disabled for compred testing
- else if (cpi->is_src_frame_alt_ref) {
// Enable ref frame features for segment 0 as well
vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME);
vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME);
@@ -490,9 +479,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) {
}
// Enable data udpate
xd->update_mb_segmentation_data = 1;
- }
- // All other frames.
- else {
+ } else {
+ // All other frames.
+
// No updates.. leave things as they are.
xd->update_mb_segmentation_map = 0;
xd->update_mb_segmentation_data = 0;
@@ -628,7 +617,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) {
sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500;
sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500;
- sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500;
sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000;
sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000;
@@ -867,9 +855,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
}
cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4;
- cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair;
- cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8;
- cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16;
vp9_init_quantizer(cpi);
@@ -959,23 +944,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
CHECK_MEM_ERROR(cpi->mb_norm_activity_map,
vpx_calloc(sizeof(unsigned int),
cm->mb_rows * cm->mb_cols));
-
- vpx_free(cpi->twopass.total_stats);
-
- cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
- vpx_free(cpi->twopass.total_left_stats);
- cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
- vpx_free(cpi->twopass.this_frame_stats);
-
- cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS));
-
- if (!cpi->twopass.total_stats ||
- !cpi->twopass.total_left_stats ||
- !cpi->twopass.this_frame_stats)
- vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate firstpass stats");
}
@@ -1647,6 +1615,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8,
NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d)
+ BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL)
+
+ BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL)
+
BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4,
NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d)
@@ -3326,9 +3300,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count);
vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count);
vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count);
- vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count);
vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count);
- vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count);
vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count);
#if CONFIG_COMP_INTERINTRA_PRED
vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index aeaf1bda3..cc91ba5d2 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -48,9 +48,9 @@
#define KEY_FRAME_CONTEXT 5
#if CONFIG_COMP_INTERINTRA_PRED
-#define MAX_MODES 54
+#define MAX_MODES 53
#else
-#define MAX_MODES 42
+#define MAX_MODES 41
#endif
#define MIN_THRESHMULT 32
@@ -72,7 +72,6 @@ typedef struct {
// Stats
int y_modes[VP9_YMODES];
int uv_modes[VP9_UV_MODES];
- int i8x8_modes[VP9_I8X8_MODES];
int b_modes[B_MODE_COUNT];
int inter_y_modes[MB_MODE_COUNT];
int inter_uv_modes[VP9_UV_MODES];
@@ -100,9 +99,7 @@ typedef struct {
vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */
vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1];
vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1];
- vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1];
vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1];
- vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1];
vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1];
vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1]
@@ -207,7 +204,6 @@ typedef enum {
THR_SPLITA,
THR_B_PRED,
- THR_I8X8_PRED,
THR_COMP_ZEROLG,
THR_COMP_NEARESTLG,
@@ -273,10 +269,12 @@ typedef struct {
} SPEED_FEATURES;
enum BlockSize {
- BLOCK_16X8 = PARTITIONING_16X8,
- BLOCK_8X16 = PARTITIONING_8X16,
- BLOCK_8X8 = PARTITIONING_8X8,
- BLOCK_4X4 = PARTITIONING_4X4,
+ BLOCK_4X4,
+ BLOCK_4X8,
+ BLOCK_8X4,
+ BLOCK_8X8,
+ BLOCK_8X16,
+ BLOCK_16X8,
BLOCK_16X16,
BLOCK_MAX_SEGMENTS,
BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
@@ -451,9 +449,7 @@ typedef struct VP9_COMP {
int sb_ymode_count [VP9_I32X32_MODES];
int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */
int bmode_count[VP9_NKF_BINTRAMODES];
- int i8x8_mode_count[VP9_I8X8_MODES];
int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS];
- int mbsplit_count[VP9_NUMMBSPLITS];
int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES];
unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES];
#if CONFIG_COMP_INTERINTRA_PRED
@@ -557,10 +553,10 @@ typedef struct VP9_COMP {
unsigned int section_intra_rating;
unsigned int next_iiratio;
unsigned int this_iiratio;
- FIRSTPASS_STATS *total_stats;
- FIRSTPASS_STATS *this_frame_stats;
+ FIRSTPASS_STATS total_stats;
+ FIRSTPASS_STATS this_frame_stats;
FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
- FIRSTPASS_STATS *total_left_stats;
+ FIRSTPASS_STATS total_left_stats;
int first_pass_done;
int64_t bits_left;
int64_t clip_bits_total;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 77e19721c..4ed8f6326 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -75,57 +75,52 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
*eob_ptr = eob + 1;
}
-void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks) {
+void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs,
+ TX_TYPE tx_type) {
MACROBLOCKD *const xd = &mb->e_mbd;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int *pt_scan = get_scan_4x4(tx_type);
+ const int mul = n_coeffs == 1024 ? 2 : 1;
+ const int *scan;
+
+ // These contexts may be available in the caller
+ switch (n_coeffs) {
+ case 4 * 4:
+ scan = get_scan_4x4(tx_type);
+ break;
+ case 8 * 8:
+ scan = get_scan_8x8(tx_type);
+ break;
+ case 16 * 16:
+ scan = get_scan_16x16(tx_type);
+ break;
+ default:
+ scan = vp9_default_zig_zag1d_32x32;
+ break;
+ }
- quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
- BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
- 16, mb->skip_block,
- mb->plane[pb_idx.plane].zbin,
- mb->plane[pb_idx.plane].round,
- mb->plane[pb_idx.plane].quant,
- mb->plane[pb_idx.plane].quant_shift,
- BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
- BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
- xd->plane[pb_idx.plane].dequant,
- mb->plane[pb_idx.plane].zbin_extra,
- &xd->plane[pb_idx.plane].eobs[pb_idx.block],
- pt_scan, 1);
+ quantize(mb->plane[plane].zrun_zbin_boost,
+ BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
+ n_coeffs, mb->skip_block,
+ mb->plane[plane].zbin,
+ mb->plane[plane].round,
+ mb->plane[plane].quant,
+ mb->plane[plane].quant_shift,
+ BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16),
+ BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
+ xd->plane[plane].dequant,
+ mb->plane[plane].zbin_extra,
+ &xd->plane[plane].eobs[block],
+ scan, mul);
}
-void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
+void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks) {
MACROBLOCKD *const xd = &mb->e_mbd;
const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int *pt_scan = get_scan_8x8(tx_type);
-
- quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
- BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
- 64, mb->skip_block,
- mb->plane[pb_idx.plane].zbin,
- mb->plane[pb_idx.plane].round,
- mb->plane[pb_idx.plane].quant,
- mb->plane[pb_idx.plane].quant_shift,
- BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
- BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
- xd->plane[pb_idx.plane].dequant,
- mb->plane[pb_idx.plane].zbin_extra,
- &xd->plane[pb_idx.plane].eobs[pb_idx.block],
- pt_scan, 1);
-}
-
-void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
- const int *pt_scan = get_scan_16x16(tx_type);
+ const int *pt_scan = get_scan_4x4(tx_type);
quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
- 256, mb->skip_block,
+ 16, mb->skip_block,
mb->plane[pb_idx.plane].zbin,
mb->plane[pb_idx.plane].round,
mb->plane[pb_idx.plane].quant,
@@ -138,120 +133,6 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
pt_scan, 1);
}
-void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx);
-
- quantize(mb->plane[pb_idx.plane].zrun_zbin_boost,
- BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16),
- 1024, mb->skip_block,
- mb->plane[pb_idx.plane].zbin,
- mb->plane[pb_idx.plane].round,
- mb->plane[pb_idx.plane].quant,
- mb->plane[pb_idx.plane].quant_shift,
- BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
- BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
- xd->plane[pb_idx.plane].dequant,
- mb->plane[pb_idx.plane].zbin_extra,
- &xd->plane[pb_idx.plane].eobs[pb_idx.block],
- vp9_default_zig_zag1d_32x32, 2);
-}
-
-void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bw = 1 << (b_width_log2(bsize) - 3);
- const int bh = 1 << (b_height_log2(bsize) - 3);
- int n;
-
- for (n = 0; n < bw * bh; n++)
- vp9_regular_quantize_b_32x32(x, n * 64, bw * bh * 64);
-}
-
-void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 2);
- const int bstride = 16 << bwl;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd,
- 4 * x_idx + y_idx * bstride);
- x->quantize_b_16x16(x, n * 16, tx_type, 16 * bw * bh);
- }
-}
-
-void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 1);
- const int bstride = 4 << bwl;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const int x_idx = n & (bw - 1), y_idx = n >> bwl;
- TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd,
- 2 * x_idx + y_idx * bstride);
- x->quantize_b_8x8(x, n * 4, tx_type, 4 * bw * bh);
- }
-}
-
-void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bw = 1 << bwl;
- const int bh = 1 << b_height_log2(bsize);
- MACROBLOCKD *const xd = &x->e_mbd;
- int n;
-
- for (n = 0; n < bw * bh; n++) {
- const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
- x->quantize_b_4x4(x, n, tx_type, bw * bh);
- }
-}
-
-void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- assert(bsize == BLOCK_SIZE_SB64X64);
- vp9_regular_quantize_b_32x32(x, 256, 256);
- vp9_regular_quantize_b_32x32(x, 320, 256);
-}
-
-void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2;
- const int bhl = b_height_log2(bsize) - 2;
- const int uoff = 16 << (bhl + bwl);
- int i;
-
- for (i = uoff; i < ((uoff * 3) >> 1); i += 16)
- x->quantize_b_16x16(x, i, DCT_DCT, uoff);
-}
-
-void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2;
- const int bhl = b_height_log2(bsize) - 2;
- const int uoff = 16 << (bhl + bwl);
- int i;
-
- for (i = uoff; i < ((uoff * 3) >> 1); i += 4)
- x->quantize_b_8x8(x, i, DCT_DCT, uoff);
-}
-
-void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2;
- const int bhl = b_height_log2(bsize) - 2;
- const int uoff = 16 << (bhl + bwl);
- int i;
-
- for (i = uoff; i < ((uoff * 3) >> 1); i++)
- x->quantize_b_4x4(x, i, DCT_DCT, uoff);
-}
-
-/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of
- * these two C functions if corresponding optimized routine is not available.
- * NEON optimized version implements currently the fast quantization for pair
- * of blocks. */
-void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2,
- int y_blocks) {
- vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks);
- vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks);
-}
-
static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
unsigned t;
int l;
@@ -266,6 +147,7 @@ static void invert_quant(int16_t *quant, uint8_t *shift, int d) {
void vp9_init_quantizer(VP9_COMP *cpi) {
int i;
int quant_val;
+ int quant_uv_val;
int q;
static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12,
@@ -293,52 +175,36 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
cpi->common.uv_dequant[q][0] = quant_val;
cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7;
+ quant_val = vp9_ac_quant(q, 0);
+ cpi->common.y_dequant[q][1] = quant_val;
+ quant_uv_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q);
+ cpi->common.uv_dequant[q][1] = quant_uv_val;
// all the 4x4 ac values =;
for (i = 1; i < 16; i++) {
int rc = vp9_default_zig_zag1d_4x4[i];
- quant_val = vp9_ac_quant(q, 0);
invert_quant(cpi->Y1quant[q] + rc, cpi->Y1quant_shift[q] + rc, quant_val);
cpi->Y1zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
cpi->Y1round[q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.y_dequant[q][rc] = quant_val;
cpi->zrun_zbin_boost_y1[q][i] =
ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
- quant_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q);
- invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, quant_val);
- cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7);
- cpi->UVround[q][rc] = (qrounding_factor * quant_val) >> 7;
- cpi->common.uv_dequant[q][rc] = quant_val;
+ invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc,
+ quant_uv_val);
+ cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_uv_val, 7);
+ cpi->UVround[q][rc] = (qrounding_factor * quant_uv_val) >> 7;
cpi->zrun_zbin_boost_uv[q][i] =
- ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7);
+ ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7);
}
}
}
void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
int i;
- int qindex;
MACROBLOCKD *xd = &x->e_mbd;
int zbin_extra;
int segment_id = xd->mode_info_context->mbmi.segment_id;
-
- // Select the baseline MB Q index allowing for any segment level change.
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) {
- if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) {
- // Abs Value
- qindex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
- } else {
- // Delta Value
- qindex = cpi->common.base_qindex +
- vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q);
-
- // Clamp to valid range
- qindex = clamp(qindex, 0, MAXQ);
- }
- } else {
- qindex = cpi->common.base_qindex;
- }
+ const int qindex = vp9_get_qindex(xd, segment_id, cpi->common.base_qindex);
// Y
zbin_extra = (cpi->common.y_dequant[qindex][1] *
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index fd7a4bb4f..2b1eeabbe 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -22,9 +22,8 @@
#define prototype_quantize_mb(sym) \
void (sym)(MACROBLOCK *x)
-#if ARCH_X86 || ARCH_X86_64
-#include "x86/vp9_quantize_x86.h"
-#endif
+void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coefs,
+ TX_TYPE tx_type);
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
int y_blocks);
@@ -32,20 +31,6 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks);
void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
int y_blocks);
-void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
- int y_blocks);
-void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx,
- int y_blocks);
-
-void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize);
-
struct VP9_COMP;
extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 47252253d..0f84b1a37 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -138,9 +138,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) {
vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob);
vp9_copy(cc->bmode_prob, cm->fc.bmode_prob);
vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob);
- vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob);
vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob);
- vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob);
vp9_copy(cc->partition_prob, cm->fc.partition_prob);
// Stats
@@ -198,10 +196,8 @@ void vp9_restore_coding_context(VP9_COMP *cpi) {
vp9_copy(cm->fc.ymode_prob, cc->ymode_prob);
vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob);
vp9_copy(cm->fc.bmode_prob, cc->bmode_prob);
- vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob);
vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob);
vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob);
- vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob);
vp9_copy(cm->fc.partition_prob, cc->partition_prob);
// Stats
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 0e85a0c71..90d56b2d2 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -13,8 +13,8 @@
#include <math.h>
#include <limits.h>
#include <assert.h>
-#include "vp9/common/vp9_pragmas.h"
+#include "vp9/common/vp9_pragmas.h"
#include "vp9/encoder/vp9_tokenize.h"
#include "vp9/encoder/vp9_treewriter.h"
#include "vp9/encoder/vp9_onyx_int.h"
@@ -34,7 +34,6 @@
#include "vpx_mem/vpx_mem.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemv.h"
-
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_entropy.h"
@@ -42,8 +41,6 @@
#include "vp9/common/vp9_mvref_common.h"
#include "vp9/common/vp9_common.h"
-#define MAXF(a,b) (((a) > (b)) ? (a) : (b))
-
#define INVALID_MV 0x80008000
/* Factor to weigh the rate for switchable interp filters */
@@ -105,7 +102,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{SPLITMV, ALTREF_FRAME, NONE},
{I4X4_PRED, INTRA_FRAME, NONE},
- {I8X8_PRED, INTRA_FRAME, NONE},
/* compound prediction modes */
{ZEROMV, LAST_FRAME, GOLDEN_FRAME},
@@ -155,11 +151,9 @@ static void fill_token_costs(vp9_coeff_count *c,
for (i = 0; i < BLOCK_TYPES; i++)
for (j = 0; j < REF_TYPES; j++)
for (k = 0; k < COEF_BANDS; k++)
- for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
- vp9_cost_tokens_skip((int *)(c[i][j][k][l]),
- p[i][j][k][l],
+ for (l = 0; l < PREV_COEF_CONTEXTS; l++)
+ vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l],
vp9_coef_tree);
- }
}
static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0,
@@ -182,7 +176,7 @@ void vp9_init_me_luts() {
for (i = 0; i < QINDEX_RANGE; i++) {
sad_per_bit16lut[i] =
(int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
- sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742);
+ sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
}
}
@@ -206,7 +200,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
// for key frames, golden frames and arf frames.
// if (cpi->common.refresh_golden_frame ||
// cpi->common.refresh_alt_ref_frame)
- qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex);
+ qindex = clamp(qindex, 0, MAXQ);
cpi->RDMULT = compute_rd_mult(qindex);
if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
@@ -291,7 +285,7 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
}
static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
- int ib, PLANE_TYPE type,
+ int plane, int block, PLANE_TYPE type,
ENTROPY_CONTEXT *A,
ENTROPY_CONTEXT *L,
TX_SIZE tx_size,
@@ -302,10 +296,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
int c = 0;
int cost = 0, pad;
const int *scan, *nb;
- const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib);
- const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block];
- const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
- pb_idx.block, 16);
+ const int eob = xd->plane[plane].eobs[block];
+ const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
+ block, 16);
const int ref = mbmi->ref_frame != INTRA_FRAME;
unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
mb->token_costs[tx_size][type][ref];
@@ -332,7 +325,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
#endif
// Check for consistency of tx_size with mode info
- assert((!type && !pb_idx.plane) || (type && pb_idx.plane));
+ assert((!type && !plane) || (type && plane));
if (type == PLANE_TYPE_Y_WITH_DC) {
assert(xd->mode_info_context->mbmi.txfm_size == tx_size);
} else {
@@ -343,7 +336,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
switch (tx_size) {
case TX_4X4: {
tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type_4x4(xd, ib) : DCT_DCT;
+ get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
coef_probs = cm->fc.coef_probs_4x4;
@@ -357,7 +350,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
case TX_8X8: {
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
const int sz = 1 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
+ const int x = block & ((1 << sz) - 1), y = block - x;
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
above_ec = (A[0] + A[1]) != 0;
@@ -373,7 +366,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
case TX_16X16: {
const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
const int sz = 2 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
+ const int x = block & ((1 << sz) - 1), y = block - x;
TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
scan = get_scan_16x16(tx_type);
@@ -563,17 +556,19 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_32X32;
- } else if ( cm->txfm_mode == ALLOW_16X16 ||
- (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) ||
- (cm->txfm_mode == TX_MODE_SELECT &&
- rd[TX_16X16][1] < rd[TX_8X8][1] &&
- rd[TX_16X16][1] < rd[TX_4X4][1])) {
+ } else if (max_txfm_size >= TX_16X16 &&
+ (cm->txfm_mode == ALLOW_16X16 ||
+ cm->txfm_mode == ALLOW_32X32 ||
+ (cm->txfm_mode == TX_MODE_SELECT &&
+ rd[TX_16X16][1] < rd[TX_8X8][1] &&
+ rd[TX_16X16][1] < rd[TX_4X4][1]))) {
mbmi->txfm_size = TX_16X16;
} else if (cm->txfm_mode == ALLOW_8X8 ||
+ cm->txfm_mode == ALLOW_16X16 ||
+ cm->txfm_mode == ALLOW_32X32 ||
(cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
mbmi->txfm_size = TX_8X8;
} else {
- assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT);
mbmi->txfm_size = TX_4X4;
}
@@ -583,13 +578,14 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
txfm_cache[ONLY_4X4] = rd[TX_4X4][0];
txfm_cache[ALLOW_8X8] = rd[TX_8X8][0];
- txfm_cache[ALLOW_16X16] = rd[TX_16X16][0];
- txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0];
+ txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0];
+ txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0];
if (max_txfm_size == TX_32X32 &&
rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
rd[TX_32X32][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
- else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
+ else if (max_txfm_size >= TX_16X16 &&
+ rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
else
txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
@@ -603,16 +599,17 @@ static int block_error(int16_t *coeff, int16_t *dqcoeff,
for (i = 0; i < block_size; i++) {
int this_diff = coeff[i] - dqcoeff[i];
- error += this_diff * this_diff;
+ error += (unsigned)this_diff * this_diff;
}
error >>= shift;
return error > INT_MAX ? INT_MAX : (int)error;
}
-static int block_error_sby(MACROBLOCK *x, int block_size, int shift) {
+static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
+ const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff,
- block_size, shift);
+ 16 << (bwl + bhl), shift);
}
static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
@@ -630,155 +627,54 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) {
return sum > INT_MAX ? INT_MAX : (int)sum;
}
-static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bw = 1 << bwl;
- const int bh = 1 << b_height_log2(bsize);
- int cost = 0, b;
+static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x,
+ int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
+ const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x;
+ const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y;
+ const int bw = 1 << bwl, bh = 1 << bhl;
ENTROPY_CONTEXT t_above[16], t_left[16];
+ int block, cost;
- vpx_memcpy(&t_above, xd->plane[0].above_context,
+ vpx_memcpy(&t_above, xd->plane[plane].above_context,
sizeof(ENTROPY_CONTEXT) * bw);
- vpx_memcpy(&t_left, xd->plane[0].left_context,
+ vpx_memcpy(&t_left, xd->plane[plane].left_context,
sizeof(ENTROPY_CONTEXT) * bh);
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC,
- t_above + x_idx, t_left + y_idx,
- TX_4X4, bw * bh);
- }
-
- return cost;
-}
-
-static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- MACROBLOCKD *const xd = &x->e_mbd;
-
- xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- vp9_transform_sby_4x4(x, bsize);
- vp9_quantize_sby_4x4(x, bsize);
-
- *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
- *rate = rdcost_sby_4x4(cm, x, bsize);
- *skippable = vp9_sby_is_skippable(xd, bsize);
-}
-
-static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 1);
- int cost = 0, b;
- MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT t_above[16], t_left[16];
-
- vpx_memcpy(&t_above, xd->plane[0].above_context,
- sizeof(ENTROPY_CONTEXT) * 2 * bw);
- vpx_memcpy(&t_left, xd->plane[0].left_context,
- sizeof(ENTROPY_CONTEXT) * 2 * bh);
-
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC,
- t_above + x_idx * 2, t_left + y_idx * 2,
- TX_8X8, 4 * bw * bh);
- }
-
- return cost;
-}
-
-static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- MACROBLOCKD *const xd = &x->e_mbd;
-
- xd->mode_info_context->mbmi.txfm_size = TX_8X8;
- vp9_transform_sby_8x8(x, bsize);
- vp9_quantize_sby_8x8(x, bsize);
+ cost = 0;
+ for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) {
+ int x_idx, y_idx;
- *distortion = block_error_sby(x, 16 << (bhl + bwl), 2);
- *rate = rdcost_sby_8x8(cm, x, bsize);
- *skippable = vp9_sby_is_skippable(xd, bsize);
-}
-
-static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 2);
- int cost = 0, b;
- MACROBLOCKD *const xd = &x->e_mbd;
- ENTROPY_CONTEXT t_above[16], t_left[16];
+ txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2,
+ &x_idx, &y_idx);
- vpx_memcpy(&t_above, xd->plane[0].above_context,
- sizeof(ENTROPY_CONTEXT) * 4 * bw);
- vpx_memcpy(&t_left, xd->plane[0].left_context,
- sizeof(ENTROPY_CONTEXT) * 4 * bh);
-
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC,
- t_above + x_idx * 4, t_left + y_idx * 4,
- TX_16X16, bw * bh * 16);
+ cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type,
+ t_above + x_idx, t_left + y_idx,
+ tx_size, bw * bh);
}
return cost;
}
-static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
- MACROBLOCKD *const xd = &x->e_mbd;
+static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x,
+ BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
+ int cost = 0, plane;
- xd->mode_info_context->mbmi.txfm_size = TX_16X16;
- vp9_transform_sby_16x16(x, bsize);
- vp9_quantize_sby_16x16(x, bsize);
-
- *distortion = block_error_sby(x, 16 << (bwl + bhl), 2);
- *rate = rdcost_sby_16x16(cm, x, bsize);
- *skippable = vp9_sby_is_skippable(xd, bsize);
-}
-
-static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 3);
- int cost = 0, b;
- MACROBLOCKD * const xd = &x->e_mbd;
- ENTROPY_CONTEXT t_above[16], t_left[16];
-
- vpx_memcpy(&t_above, xd->plane[0].above_context,
- sizeof(ENTROPY_CONTEXT) * 8 * bw);
- vpx_memcpy(&t_left, xd->plane[0].left_context,
- sizeof(ENTROPY_CONTEXT) * 8 * bh);
-
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC,
- t_above + x_idx * 8, t_left + y_idx * 8,
- TX_32X32, bw * bh * 64);
+ for (plane = 1; plane < MAX_MB_PLANE; plane++) {
+ cost += rdcost_plane(cm, x, plane, bsize, tx_size);
}
-
return cost;
}
-static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skippable,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize);
+static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion, int *skippable,
+ BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
+ xd->mode_info_context->mbmi.txfm_size = tx_size;
+ vp9_xform_quant_sby(cm, x, bsize);
- xd->mode_info_context->mbmi.txfm_size = TX_32X32;
- vp9_transform_sby_32x32(x, bsize);
- vp9_quantize_sby_32x32(x, bsize);
-
- *distortion = block_error_sby(x, 16 << (bwl + bhl), 0);
- *rate = rdcost_sby_32x32(cm, x, bsize);
+ *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2);
+ *rate = rdcost_plane(cm, x, 0, bsize, tx_size);
*skippable = vp9_sby_is_skippable(xd, bsize);
}
@@ -792,14 +688,19 @@ static void super_block_yrd(VP9_COMP *cpi,
vp9_subtract_sby(x, bs);
if (bs >= BLOCK_SIZE_SB32X32)
- super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
- bs);
- super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs);
- super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs);
- super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs);
+ super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32],
+ bs, TX_32X32);
+ if (bs >= BLOCK_SIZE_MB16X16)
+ super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16],
+ bs, TX_16X16);
+ super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs,
+ TX_8X8);
+ super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs,
+ TX_4X4);
choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache,
- TX_32X32 - (bs < BLOCK_SIZE_SB32X32));
+ TX_32X32 - (bs < BLOCK_SIZE_SB32X32)
+ - (bs < BLOCK_SIZE_MB16X16));
}
static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
@@ -816,17 +717,25 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
VP9_COMMON *const cm = &cpi->common;
const int src_stride = x->plane[0].src.stride;
uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
+ raster_block_offset_uint8(xd,
+ BLOCK_SIZE_SB8X8,
+ 0, ib,
x->plane[0].src.buf, src_stride);
int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
+ raster_block_offset_int16(xd,
+ BLOCK_SIZE_SB8X8,
+ 0, ib,
x->plane[0].src_diff);
int16_t* const diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
+ raster_block_offset_int16(xd,
+ BLOCK_SIZE_SB8X8,
+ 0, ib,
xd->plane[0].diff);
int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16);
uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
+ raster_block_offset_uint8(xd,
+ BLOCK_SIZE_SB8X8,
+ 0, ib,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
ENTROPY_CONTEXT ta = *a, tempa = *a;
ENTROPY_CONTEXT tl = *l, templ = *l;
@@ -839,7 +748,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
* */
DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
- assert(ib < 16);
+ assert(ib < 4);
#if CONFIG_NEWBINTRAMODES
xd->mode_info_context->bmi[ib].as_mode.context =
vp9_find_bpred_context(xd, ib, dst, xd->plane[0].dst.stride);
@@ -867,25 +776,27 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
rate = bmode_costs[mode];
#endif
- vp9_intra4x4_predict(xd, ib, mode, dst, xd->plane[0].dst.stride);
- vp9_subtract_block(4, 4, src_diff, 16,
+ vp9_intra4x4_predict(xd, ib,
+ BLOCK_SIZE_SB8X8,
+ mode, dst, xd->plane[0].dst.stride);
+ vp9_subtract_block(4, 4, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
xd->mode_info_context->bmi[ib].as_mode.first = mode;
tx_type = get_tx_type_4x4(xd, ib);
if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
+ vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
x->quantize_b_4x4(x, ib, tx_type, 16);
} else {
- x->fwd_txm4x4(src_diff, coeff, 32);
+ x->fwd_txm4x4(src_diff, coeff, 16);
x->quantize_b_4x4(x, ib, tx_type, 16);
}
tempa = ta;
templ = tl;
- ratey = cost_coeffs(cm, x, ib,
+ ratey = cost_coeffs(cm, x, 0, ib,
PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16);
rate += ratey;
distortion = vp9_block_error(coeff,
@@ -911,13 +822,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
// inverse transform
if (best_tx_type != DCT_DCT)
- vp9_short_iht4x4(best_dqcoeff, diff, 16, best_tx_type);
+ vp9_short_iht4x4(best_dqcoeff, diff, 8, best_tx_type);
else
- xd->inv_txm4x4(best_dqcoeff, diff, 32);
+ xd->inv_txm4x4(best_dqcoeff, diff, 16);
- vp9_intra4x4_predict(xd, ib, *best_mode,
+ vp9_intra4x4_predict(xd, ib,
+ BLOCK_SIZE_SB8X8,
+ *best_mode,
dst, xd->plane[0].dst.stride);
- vp9_recon_b(dst, diff,
+ vp9_recon_b(dst, diff, 8,
dst, xd->plane[0].dst.stride);
return best_rd;
@@ -932,7 +845,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
int distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
+ ENTROPY_CONTEXT t_above[2], t_left[2];
int *bmode_costs;
vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
@@ -941,15 +854,17 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
xd->mode_info_context->mbmi.mode = I4X4_PRED;
bmode_costs = mb->inter_bmode_costs;
- for (i = 0; i < 16; i++) {
- const int x_idx = i & 3, y_idx = i >> 2;
+ for (i = 0; i < 4; i++) {
+ const int x_idx = i & 1, y_idx = i >> 1;
MODE_INFO *const mic = xd->mode_info_context;
const int mis = xd->mode_info_stride;
B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
#if CONFIG_NEWBINTRAMODES
uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
+ raster_block_offset_uint8(xd,
+ BLOCK_SIZE_SB8X8,
+ 0, i,
xd->plane[0].dst.buf,
xd->plane[0].dst.stride);
#endif
@@ -1046,403 +961,16 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}
-static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
- B_PREDICTION_MODE *best_mode,
- int *mode_costs,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
- int *bestrate, int *bestratey,
- int *bestdistortion) {
- VP9_COMMON *const cm = &cpi->common;
- MB_PREDICTION_MODE mode;
- MACROBLOCKD *xd = &x->e_mbd;
- int64_t best_rd = INT64_MAX;
- int distortion = 0, rate = 0;
- ENTROPY_CONTEXT ta[2], tl[2], ta_temp[2], tl_temp[2];
- // perform transformation of dimension 8x8
- // note the input and output index mapping
- int idx = (ib & 0x02) ? (ib + 2) : ib;
- const int src_stride = x->plane[0].src.stride;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src.buf, src_stride);
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- xd->plane[0].dst.buf, xd->plane[0].dst.stride);
-
- assert(ib < 16);
- vpx_memcpy(ta, a, sizeof(ta));
- vpx_memcpy(tl, l, sizeof(tl));
- for (mode = DC_PRED; mode <= TM_PRED; mode++) {
- int64_t this_rd;
- int rate_t = 0;
-
- // FIXME rate for compound mode and second intrapred mode
- rate = mode_costs[mode];
- xd->mode_info_context->bmi[ib].as_mode.first = mode;
-
- vp9_intra8x8_predict(xd, ib, mode, dst, xd->plane[0].dst.stride);
-
- vp9_subtract_block(8, 8, src_diff, 16,
- src, src_stride,
- dst, xd->plane[0].dst.stride);
-
- vpx_memcpy(ta_temp, ta, sizeof(ta));
- vpx_memcpy(tl_temp, tl, sizeof(tl));
-
- if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
- TX_TYPE tx_type = get_tx_type_8x8(xd, ib);
- if (tx_type != DCT_DCT)
- vp9_short_fht8x8(src_diff, coeff, 16, tx_type);
- else
- x->fwd_txm8x8(src_diff, coeff, 32);
- x->quantize_b_8x8(x, idx, tx_type, 16);
-
- // compute quantization mse of 8x8 block
- distortion = vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
-
- rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
- ta_temp, tl_temp, TX_8X8, 16);
-
- rate += rate_t;
- } else {
- static const int iblock[4] = {0, 1, 4, 5};
- TX_TYPE tx_type;
- int i;
-
- distortion = 0;
- rate_t = 0;
- for (i = 0; i < 4; ++i) {
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
- 0, ib + iblock[i],
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
- ib + iblock[i], 16);
- int do_two = 0;
- tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
- if (tx_type != DCT_DCT) {
- vp9_short_fht4x4(src_diff, coeff, 16, tx_type);
- x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
- } else if (!(i & 1) &&
- get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
- x->fwd_txm8x4(src_diff, coeff, 32);
- x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16);
- do_two = 1;
- } else {
- x->fwd_txm4x4(src_diff, coeff, 32);
- x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16);
- }
- distortion += vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
- 16 << do_two);
- rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
- &ta_temp[i & 1], &tl_temp[i >> 1],
- TX_4X4, 16);
- if (do_two) {
- i++;
- rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
- &ta_temp[i & 1], &tl_temp[i >> 1],
- TX_4X4, 16);
- }
- }
- rate += rate_t;
- }
-
- distortion >>= 2;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
- if (this_rd < best_rd) {
- *bestrate = rate;
- *bestratey = rate_t;
- *bestdistortion = distortion;
- vpx_memcpy(a, ta_temp, sizeof(ta_temp));
- vpx_memcpy(l, tl_temp, sizeof(tl_temp));
- best_rd = this_rd;
- *best_mode = mode;
- }
- }
- xd->mode_info_context->bmi[ib].as_mode.first = (*best_mode);
- vp9_encode_intra8x8(x, ib);
-
- return best_rd;
-}
-
-static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
- int *Rate, int *rate_y,
- int *Distortion, int64_t best_rd) {
- MACROBLOCKD *const xd = &mb->e_mbd;
- int i, ib;
- int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED];
- int distortion = 0;
- int tot_rate_y = 0;
- int64_t total_rd = 0;
- ENTROPY_CONTEXT t_above[4], t_left[4];
- int *i8x8mode_costs;
-
- vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
- vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
-
- xd->mode_info_context->mbmi.mode = I8X8_PRED;
- i8x8mode_costs = mb->i8x8_mode_costs;
-
- for (i = 0; i < 4; i++) {
- const int x_idx = i & 1, y_idx = i >> 1;
- MODE_INFO *const mic = xd->mode_info_context;
- B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode);
- int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d);
-
- ib = vp9_i8x8_block[i];
- total_rd += rd_pick_intra8x8block(cpi, mb, ib, &best_mode, i8x8mode_costs,
- t_above + x_idx * 2, t_left + y_idx * 2,
- &r, &ry, &d);
- cost += r;
- distortion += d;
- tot_rate_y += ry;
- mic->bmi[ib].as_mode.first = best_mode;
- }
-
- *Rate = cost;
- *rate_y = tot_rate_y;
- *Distortion = distortion;
- return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
-}
-
-static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x,
- int *rate, int *rate_y,
- int *distortion,
- int *mode8x8,
- int64_t best_yrd,
- int64_t *txfm_cache) {
- VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
- int cost0 = vp9_cost_bit(cm->prob_tx[0], 0);
- int cost1 = vp9_cost_bit(cm->prob_tx[0], 1);
- int64_t tmp_rd_4x4s, tmp_rd_8x8s;
- int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd;
- int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8;
-
- mbmi->txfm_size = TX_4X4;
- tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4,
- &d4x4, best_yrd);
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- mbmi->txfm_size = TX_8X8;
- tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8,
- &d8x8, best_yrd);
- txfm_cache[ONLY_4X4] = tmp_rd_4x4;
- txfm_cache[ALLOW_8X8] = tmp_rd_8x8;
- txfm_cache[ALLOW_16X16] = tmp_rd_8x8;
- tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0);
- tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0);
- txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ?
- tmp_rd_4x4s : tmp_rd_8x8s;
- if (cm->txfm_mode == TX_MODE_SELECT) {
- if (tmp_rd_4x4s < tmp_rd_8x8s) {
- *rate = r4x4 + cost0;
- *rate_y = tok4x4 + cost0;
- *distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4s;
- } else {
- *rate = r8x8 + cost1;
- *rate_y = tok8x8 + cost1;
- *distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8s;
-
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
- } else if (cm->txfm_mode == ONLY_4X4) {
- *rate = r4x4;
- *rate_y = tok4x4;
- *distortion = d4x4;
- mbmi->txfm_size = TX_4X4;
- tmp_rd = tmp_rd_4x4;
- } else {
- *rate = r8x8;
- *rate_y = tok8x8;
- *distortion = d8x8;
- mbmi->txfm_size = TX_8X8;
- tmp_rd = tmp_rd_8x8;
-
- mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first;
- mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first;
- mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first;
- mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first;
- }
-
- return tmp_rd;
-}
-
-static int rd_cost_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 1);
- int yoff = 4 * bw * bh;
- int p, b, cost = 0;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (p = 1; p < MAX_MB_PLANE; p++) {
- ENTROPY_CONTEXT t_above[8], t_left[8];
-
- vpx_memcpy(t_above, xd->plane[p].above_context,
- sizeof(ENTROPY_CONTEXT) * 2 * bw >> xd->plane[p].subsampling_x);
- vpx_memcpy(t_left, xd->plane[p].left_context,
- sizeof(ENTROPY_CONTEXT) * 2 * bh >> xd->plane[p].subsampling_y);
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, yoff + b, PLANE_TYPE_UV,
- t_above + x_idx, t_left + y_idx,
- TX_4X4, bw * bh * 4);
- }
- yoff = (yoff * 5) >> 2; // u -> v
- }
-
- return cost;
-}
-
-static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skip,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
-
- vp9_transform_sbuv_4x4(x, bsize);
- vp9_quantize_sbuv_4x4(x, bsize);
-
- *rate = rd_cost_sbuv_4x4(cm, x, bsize);
- *distortion = block_error_sbuv(x, bsize, 2);
- *skip = vp9_sbuv_is_skippable(xd, bsize);
-}
-
-static int rd_cost_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 2);
- int yoff = 16 * bw * bh;
- int p, b, cost = 0;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (p = 1; p < MAX_MB_PLANE; p++) {
- ENTROPY_CONTEXT t_above[8], t_left[8];
-
- vpx_memcpy(t_above, xd->plane[p].above_context,
- sizeof(ENTROPY_CONTEXT) * 4 * bw >> xd->plane[p].subsampling_x);
- vpx_memcpy(t_left, xd->plane[p].left_context,
- sizeof(ENTROPY_CONTEXT) * 4 * bh >> xd->plane[p].subsampling_y);
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, yoff + b * 4, PLANE_TYPE_UV,
- t_above + x_idx * 2, t_left + y_idx * 2,
- TX_8X8, bw * bh * 16);
- }
- yoff = (yoff * 5) >> 2; // u -> v
- }
-
- return cost;
-}
-
-static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skip,
- BLOCK_SIZE_TYPE bsize) {
+static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x,
+ int *rate, int *distortion,
+ int *skippable, BLOCK_SIZE_TYPE bsize,
+ TX_SIZE uv_tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
+ vp9_xform_quant_sbuv(cm, x, bsize);
- vp9_transform_sbuv_8x8(x, bsize);
- vp9_quantize_sbuv_8x8(x, bsize);
-
- *rate = rd_cost_sbuv_8x8(cm, x, bsize);
- *distortion = block_error_sbuv(x, bsize, 2);
- *skip = vp9_sbuv_is_skippable(xd, bsize);
-}
-
-static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 3);
- int yoff = 64 * bw * bh;
- int p, b, cost = 0;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (p = 1; p < MAX_MB_PLANE; p++) {
- ENTROPY_CONTEXT t_above[8], t_left[8];
-
- vpx_memcpy(t_above, xd->plane[p].above_context,
- sizeof(ENTROPY_CONTEXT) * 8 * bw >> xd->plane[p].subsampling_x);
- vpx_memcpy(t_left, xd->plane[p].left_context,
- sizeof(ENTROPY_CONTEXT) * 8 * bh >> xd->plane[p].subsampling_y);
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b & (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, yoff + b * 16, PLANE_TYPE_UV,
- t_above + x_idx * 4, t_left + y_idx * 4,
- TX_16X16, bw * bh * 64);
- }
- yoff = (yoff * 5) >> 2; // u -> v
- }
-
- return cost;
-}
-
-static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skip,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
-
- vp9_transform_sbuv_16x16(x, bsize);
- vp9_quantize_sbuv_16x16(x, bsize);
-
- *rate = rd_cost_sbuv_16x16(cm, x, bsize);
- *distortion = block_error_sbuv(x, bsize, 2);
- *skip = vp9_sbuv_is_skippable(xd, bsize);
-}
-
-static int rd_cost_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- BLOCK_SIZE_TYPE bsize) {
- const int bwl = b_width_log2(bsize) - 4, bw = 1 << bwl;
- const int bh = 1 << (b_height_log2(bsize) - 4);
- int yoff = 256 * bh * bw;
- int p, b, cost = 0;
- MACROBLOCKD *const xd = &x->e_mbd;
-
- for (p = 1; p < MAX_MB_PLANE; p++) {
- ENTROPY_CONTEXT t_above[8], t_left[8];
-
- vpx_memcpy(t_above, xd->plane[p].above_context,
- sizeof(ENTROPY_CONTEXT) * 16 * bw >> xd->plane[p].subsampling_x);
- vpx_memcpy(t_left, xd->plane[p].left_context,
- sizeof(ENTROPY_CONTEXT) * 16 * bh >> xd->plane[p].subsampling_y);
- for (b = 0; b < bw * bh; b++) {
- const int x_idx = b * (bw - 1), y_idx = b >> bwl;
- cost += cost_coeffs(cm, x, yoff + b * 64, PLANE_TYPE_UV,
- t_above + x_idx * 8, t_left + y_idx * 8,
- TX_32X32, 256 * bh * bw);
- }
- yoff = (yoff * 5) >> 2; // u -> v
- }
-
- return cost;
-}
-#undef UVCTX
-
-static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
- int *rate, int *distortion, int *skip,
- BLOCK_SIZE_TYPE bsize) {
- MACROBLOCKD *const xd = &x->e_mbd;
-
- vp9_transform_sbuv_32x32(x, bsize);
- vp9_quantize_sbuv_32x32(x, bsize);
-
- *rate = rd_cost_sbuv_32x32(cm, x, bsize);
- *distortion = block_error_sbuv(x, bsize, 0);
- *skip = vp9_sbuv_is_skippable(xd, bsize);
+ *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2);
+ *rate = rdcost_uv(cm, x, bsize, uv_tx_size);
+ *skippable = vp9_sbuv_is_skippable(xd, bsize);
}
static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
@@ -1454,14 +982,17 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x,
vp9_subtract_sbuv(x, bsize);
if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) {
- super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize);
+ super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
+ TX_32X32);
} else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) {
- super_block_uvrd_16x16(cm, x, rate, distortion, skippable, bsize);
- } else if (mbmi->txfm_size >= TX_8X8) {
- super_block_uvrd_8x8(cm, x, rate, distortion, skippable, bsize);
+ super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
+ TX_16X16);
+ } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) {
+ super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
+ TX_8X8);
} else {
- assert(mbmi->txfm_size == TX_4X4);
- super_block_uvrd_4x4(cm, x, rate, distortion, skippable, bsize);
+ super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize,
+ TX_4X4);
}
}
@@ -1524,28 +1055,25 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int;
}
-static int labels2mode(
- MACROBLOCK *x,
- int const *labelings, int which_label,
- B_PREDICTION_MODE this_mode,
- int_mv *this_mv, int_mv *this_second_mv,
- int_mv seg_mvs[MAX_REF_FRAMES - 1],
- int_mv *best_ref_mv,
- int_mv *second_best_ref_mv,
- int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
+static int labels2mode(MACROBLOCK *x,
+ int const *labelings, int which_label,
+ B_PREDICTION_MODE this_mode,
+ int_mv *this_mv, int_mv *this_second_mv,
+ int_mv seg_mvs[MAX_REF_FRAMES - 1],
+ int_mv *best_ref_mv,
+ int_mv *second_best_ref_mv,
+ int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mic = xd->mode_info_context;
MB_MODE_INFO * mbmi = &mic->mbmi;
const int mis = xd->mode_info_stride;
-
int i, cost = 0, thismvcost = 0;
/* We have to be careful retrieving previously-encoded motion vectors.
- Ones from this macroblock have to be pulled from the BLOCKD array
- as they have not yet made it to the bmi array in our MB_MODE_INFO. */
- for (i = 0; i < 16; ++i) {
- const int row = i >> 2, col = i & 3;
-
+ Ones from this macroblock have to be pulled from the BLOCKD array
+ as they have not yet made it to the bmi array in our MB_MODE_INFO. */
+ for (i = 0; i < 4; ++i) {
+ const int row = i >> 1, col = i & 1;
B_PREDICTION_MODE m;
if (labelings[i] != which_label)
@@ -1553,7 +1081,7 @@ static int labels2mode(
if (col && labelings[i] == labelings[i - 1])
m = LEFT4X4;
- else if (row && labelings[i] == labelings[i - 4])
+ else if (row && labelings[i] == labelings[i - 2])
m = ABOVE4X4;
else {
// the only time we should do costing for new motion vector or mode
@@ -1563,7 +1091,7 @@ static int labels2mode(
if (mbmi->second_ref_frame > 0) {
this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int;
this_second_mv->as_int =
- seg_mvs[mbmi->second_ref_frame - 1].as_int;
+ seg_mvs[mbmi->second_ref_frame - 1].as_int;
}
thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost,
@@ -1576,17 +1104,17 @@ static int labels2mode(
break;
case LEFT4X4:
this_mv->as_int = col ? mic->bmi[i - 1].as_mv[0].as_int :
- left_block_mv(xd, mic, i);
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
this_second_mv->as_int = col ? mic->bmi[i - 1].as_mv[1].as_int :
- left_block_second_mv(xd, mic, i);
+ left_block_second_mv(xd, mic, i);
break;
case ABOVE4X4:
- this_mv->as_int = row ? mic->bmi[i - 4].as_mv[0].as_int :
- above_block_mv(mic, i, mis);
+ this_mv->as_int = row ? mic->bmi[i - 2].as_mv[0].as_int :
+ above_block_mv(mic, i, mis);
if (mbmi->second_ref_frame > 0)
- this_second_mv->as_int = row ? mic->bmi[i - 4].as_mv[1].as_int :
- above_block_second_mv(mic, i, mis);
+ this_second_mv->as_int = row ? mic->bmi[i - 2].as_mv[1].as_int :
+ above_block_second_mv(mic, i, mis);
break;
case ZERO4X4:
this_mv->as_int = 0;
@@ -1597,15 +1125,15 @@ static int labels2mode(
break;
}
- if (m == ABOVE4X4) { // replace above with left if same
+ if (m == ABOVE4X4) { // replace above with left if same
int_mv left_mv, left_second_mv;
left_second_mv.as_int = 0;
left_mv.as_int = col ? mic->bmi[i - 1].as_mv[0].as_int :
- left_block_mv(xd, mic, i);
+ left_block_mv(xd, mic, i);
if (mbmi->second_ref_frame > 0)
left_second_mv.as_int = col ? mic->bmi[i - 1].as_mv[1].as_int :
- left_block_second_mv(xd, mic, i);
+ left_block_second_mv(xd, mic, i);
if (left_mv.as_int == this_mv->as_int &&
(mbmi->second_ref_frame <= 0 ||
@@ -1614,8 +1142,8 @@ static int labels2mode(
}
#if CONFIG_NEWBINTRAMODES
- cost = x->inter_bmode_costs[
- m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m];
+ cost = x->inter_bmode_costs[m == B_CONTEXT_PRED ?
+ m - CONTEXT_PRED_REPLACEMENTS : m];
#else
cost = x->inter_bmode_costs[m];
#endif
@@ -1648,24 +1176,24 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
*labelyrate = 0;
*distortion = 0;
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < 4; i++) {
if (labels[i] == which_label) {
const int src_stride = x->plane[0].src.stride;
uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
- x->plane[0].src.buf, src_stride);
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src.buf, src_stride);
int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, i,
- x->plane[0].src_diff);
+ raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i,
+ x->plane[0].src_diff);
int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i);
uint8_t* const pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
- xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].dst.buf,
+ xd->plane[0].dst.stride);
int thisdistortion;
vp9_build_inter_predictor(pre,
@@ -1681,210 +1209,48 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
// weighting for splitmv modes is turned on.
if (xd->mode_info_context->mbmi.second_ref_frame > 0) {
uint8_t* const second_pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i,
- xd->plane[0].pre[1].buf,
- xd->plane[0].pre[1].stride);
- vp9_build_inter_predictor(
- second_pre, xd->plane[0].pre[1].stride,
- dst, xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[i].as_mv[1],
- &xd->scale_factor[1], 4, 4, 1,
- &xd->subpix);
+ raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i,
+ xd->plane[0].pre[1].buf,
+ xd->plane[0].pre[1].stride);
+ vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride,
+ dst, xd->plane[0].dst.stride,
+ &xd->mode_info_context->bmi[i].as_mv[1],
+ &xd->scale_factor[1], 4, 4, 1,
+ &xd->subpix);
}
- vp9_subtract_block(4, 4, src_diff, 16,
+ vp9_subtract_block(4, 4, src_diff, 8,
src, src_stride,
dst, xd->plane[0].dst.stride);
- x->fwd_txm4x4(src_diff, coeff, 32);
+ x->fwd_txm4x4(src_diff, coeff, 16);
x->quantize_b_4x4(x, i, DCT_DCT, 16);
thisdistortion = vp9_block_error(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
+ BLOCK_OFFSET(xd->plane[0].dqcoeff,
+ i, 16), 16);
*distortion += thisdistortion;
- *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC,
- ta + (i & 3),
- tl + (i >> 2), TX_4X4, 16);
+ *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC,
+ ta + (i & 1),
+ tl + (i >> 1), TX_4X4, 16);
}
}
*distortion >>= 2;
return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
}
-static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
- MACROBLOCK *x,
- int const *labels,
- int which_label,
- int *labelyrate,
- int *distortion,
- int64_t *otherrd,
- ENTROPY_CONTEXT *ta,
- ENTROPY_CONTEXT *tl) {
- int i, j;
- MACROBLOCKD *xd = &x->e_mbd;
- const int iblock[4] = { 0, 1, 4, 5 };
- int othercost = 0, otherdist = 0;
- ENTROPY_CONTEXT tac[4], tlc[4];
-
- if (otherrd) {
- memcpy(&tac, ta, sizeof(tac));
- memcpy(&tlc, tl, sizeof(tlc));
- }
-
- *distortion = 0;
- *labelyrate = 0;
- for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
-
- if (labels[ib] == which_label) {
- const int use_second_ref =
- xd->mode_info_context->mbmi.second_ref_frame > 0;
- int which_mv;
- const int idx = (ib & 8) + ((ib & 2) << 1);
- const int src_stride = x->plane[0].src.stride;
- uint8_t* const src =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src.buf, src_stride);
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib,
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16);
- int thisdistortion;
- uint8_t* const dst =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride);
-
- assert(idx < 16);
- for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
- uint8_t* const pre =
- raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib,
- xd->plane[0].pre[which_mv].buf,
- xd->plane[0].pre[which_mv].stride);
-
- // TODO(debargha): Make this work properly with the
- // implicit-compoundinter-weight experiment when implicit
- // weighting for splitmv modes is turned on.
- vp9_build_inter_predictor(
- pre, xd->plane[0].pre[which_mv].stride,
- dst, xd->plane[0].dst.stride,
- &xd->mode_info_context->bmi[ib].as_mv[which_mv],
- &xd->scale_factor[which_mv], 8, 8,
- which_mv, &xd->subpix);
- }
-
- vp9_subtract_block(8, 8, src_diff, 16,
- src, src_stride,
- dst, xd->plane[0].dst.stride);
-
- if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
- if (otherrd) {
- x->fwd_txm8x8(src_diff, coeff, 32);
- x->quantize_b_8x8(x, idx, DCT_DCT, 16);
- thisdistortion = vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
- otherdist += thisdistortion;
- xd->mode_info_context->mbmi.txfm_size = TX_8X8;
- othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
- tac + (i & 1) * 2,
- tlc + (i & 2),
- TX_8X8, 16);
- xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- }
- for (j = 0; j < 4; j += 2) {
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
- 0, ib + iblock[j],
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
- ib + iblock[j], 16);
- x->fwd_txm8x4(src_diff, coeff, 32);
- x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
- thisdistortion = vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
- *distortion += thisdistortion;
- *labelyrate +=
- cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
- ta + (i & 1) * 2,
- tl + (i & 2) + ((j & 2) >> 1),
- TX_4X4, 16);
- *labelyrate +=
- cost_coeffs(cm, x, ib + iblock[j] + 1,
- PLANE_TYPE_Y_WITH_DC,
- ta + (i & 1) * 2 + 1,
- tl + (i & 2) + ((j & 2) >> 1),
- TX_4X4, 16);
- }
- } else /* 8x8 */ {
- if (otherrd) {
- for (j = 0; j < 4; j += 2) {
- int16_t* const src_diff =
- raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16,
- 0, ib + iblock[j],
- x->plane[0].src_diff);
- int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff,
- ib + iblock[j], 16);
- x->fwd_txm8x4(src_diff, coeff, 32);
- x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16);
- thisdistortion = vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
- otherdist += thisdistortion;
- xd->mode_info_context->mbmi.txfm_size = TX_4X4;
- othercost +=
- cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
- tac + (i & 1) * 2,
- tlc + (i & 2) + ((j & 2) >> 1),
- TX_4X4, 16);
- othercost +=
- cost_coeffs(cm, x, ib + iblock[j] + 1,
- PLANE_TYPE_Y_WITH_DC,
- tac + (i & 1) * 2 + 1,
- tlc + (i & 2) + ((j & 2) >> 1),
- TX_4X4, 16);
- xd->mode_info_context->mbmi.txfm_size = TX_8X8;
- }
- }
- x->fwd_txm8x8(src_diff, coeff, 32);
- x->quantize_b_8x8(x, idx, DCT_DCT, 16);
- thisdistortion = vp9_block_error_c(coeff,
- BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
- *distortion += thisdistortion;
- *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
- ta + (i & 1) * 2,
- tl + (i & 2),
- TX_8X8, 16);
- }
- }
- }
- *distortion >>= 2;
- if (otherrd) {
- otherdist >>= 2;
- *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist);
- }
- return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
-}
-
-static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0};
-
-
typedef struct {
int_mv *ref_mv, *second_ref_mv;
int_mv mvp;
int64_t segment_rd;
- SPLITMV_PARTITIONING_TYPE segment_num;
- TX_SIZE txfm_size;
int r;
int d;
int segment_yrate;
- B_PREDICTION_MODE modes[16];
- int_mv mvs[16], second_mvs[16];
- int eobs[16];
+ B_PREDICTION_MODE modes[4];
+ int_mv mvs[4], second_mvs[4];
+ int eobs[4];
int mvthresh;
int *mdcounts;
-
- int_mv sv_mvp[4]; // save 4 mvp from 8x8
- int sv_istep[2]; // save 2 initial step_param for 16x8/8x16
-
} BEST_SEG_INFO;
static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
@@ -1898,37 +1264,29 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
- SPLITMV_PARTITIONING_TYPE segmentation,
- TX_SIZE tx_size, int64_t *otherrds,
- int64_t *rds, int *completed,
- /* 16 = n_blocks */
- int_mv seg_mvs[16 /* n_blocks */]
- [MAX_REF_FRAMES - 1]) {
+ int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
int i, j;
- int const *labels;
+ static const int labels[4] = { 0, 1, 2, 3 };
int br = 0, bd = 0;
B_PREDICTION_MODE this_mode;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
-
- int label_count;
+ const int label_count = 4;
int64_t this_segment_rd = 0, other_segment_rd;
int label_mv_thresh;
int rate = 0;
int sbr = 0, sbd = 0;
int segmentyrate = 0;
- int best_eobs[16] = { 0 };
+ int best_eobs[4] = { 0 };
vp9_variance_fn_ptr_t *v_fn_ptr;
- ENTROPY_CONTEXT t_above[4], t_left[4];
- ENTROPY_CONTEXT t_above_b[4], t_left_b[4];
+ ENTROPY_CONTEXT t_above[2], t_left[2];
+ ENTROPY_CONTEXT t_above_b[2], t_left_b[2];
vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above));
vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left));
- v_fn_ptr = &cpi->fn_ptr[segmentation];
- labels = vp9_mbsplits[segmentation];
- label_count = vp9_mbsplit_count[segmentation];
+ v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4];
// 64 makes this threshold really big effectively
// making it so that we very rarely check mvs on
@@ -1937,15 +1295,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
label_mv_thresh = 1 * bsi->mvthresh / label_count;
// Segmentation method overheads
- rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs,
- vp9_mbsplit_encodings + segmentation);
rate += vp9_cost_mv_ref(cpi, SPLITMV,
mbmi->mb_mode_context[mbmi->ref_frame]);
this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
br += rate;
other_segment_rd = this_segment_rd;
- mbmi->txfm_size = tx_size;
for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) {
int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT];
int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX;
@@ -1954,10 +1309,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
// search for the best motion vector on this segment
for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) {
- int64_t this_rd, other_rd;
+ int64_t this_rd;
int distortion;
int labelyrate;
- ENTROPY_CONTEXT t_above_s[4], t_left_s[4];
+ ENTROPY_CONTEXT t_above_s[2], t_left_s[2];
vpx_memcpy(t_above_s, t_above, sizeof(t_above_s));
vpx_memcpy(t_left_s, t_left, sizeof(t_left_s));
@@ -1977,22 +1332,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
break;
if (cpi->compressor_speed) {
- if (segmentation == PARTITIONING_8X16 ||
- segmentation == PARTITIONING_16X8) {
- bsi->mvp.as_int = bsi->sv_mvp[i].as_int;
- if (i == 1 && segmentation == PARTITIONING_16X8)
- bsi->mvp.as_int = bsi->sv_mvp[2].as_int;
-
- step_param = bsi->sv_istep[i];
- }
-
// use previous block's result as next block's MV predictor.
- if (segmentation == PARTITIONING_4X4 && i > 0) {
+ if (i > 0) {
bsi->mvp.as_int =
- x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
- if (i == 4 || i == 8 || i == 12)
+ x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int;
+ if (i == 2)
bsi->mvp.as_int =
- x->e_mbd.mode_info_context->bmi[i - 4].as_mv[0].as_int;
+ x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int;
step_param = 2;
}
}
@@ -2007,24 +1353,24 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
// find first label
- n = vp9_mbsplit_offset[segmentation][i];
+ n = i;
// adjust src pointer for this segment
x->plane[0].src.buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n,
- x->plane[0].src.buf,
- x->plane[0].src.stride);
- assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0xf) == 0);
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->plane[0].src.buf,
+ x->plane[0].src.stride);
+ assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0);
x->e_mbd.plane[0].pre[0].buf =
- raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n,
- x->e_mbd.plane[0].pre[0].buf,
- x->e_mbd.plane[0].pre[0].stride);
+ raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n,
+ x->e_mbd.plane[0].pre[0].buf,
+ x->e_mbd.plane[0].pre[0].stride);
bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
sadpb, further_steps, 0, v_fn_ptr,
bsi->ref_mv, &mode_mv[NEW4X4]);
- sseshift = segmentation_to_sseshift[segmentation];
+ sseshift = 0;
// Should we do a full search (best quality only)
if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) {
@@ -2041,12 +1387,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (thissme < bestsme) {
bestsme = thissme;
mode_mv[NEW4X4].as_int =
- x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
+ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int;
} else {
/* The full search result is actually worse so re-instate the
* previous best vector */
x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int =
- mode_mv[NEW4X4].as_int;
+ mode_mv[NEW4X4].as_int;
}
}
}
@@ -2092,17 +1438,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
mv_check_bounds(x, &second_mode_mv[this_mode]))
continue;
- if (segmentation == PARTITIONING_4X4) {
- this_rd = encode_inter_mb_segment(&cpi->common,
- x, labels, i, &labelyrate,
- &distortion, t_above_s, t_left_s);
- other_rd = this_rd;
- } else {
- this_rd = encode_inter_mb_segment_8x8(&cpi->common,
- x, labels, i, &labelyrate,
- &distortion, &other_rd,
- t_above_s, t_left_s);
- }
+ this_rd = encode_inter_mb_segment(&cpi->common,
+ x, labels, i, &labelyrate,
+ &distortion, t_above_s, t_left_s);
this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0);
rate += labelyrate;
@@ -2112,24 +1450,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bestlabelyrate = labelyrate;
mode_selected = this_mode;
best_label_rd = this_rd;
- if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) {
- for (j = 0; j < 16; j++)
- if (labels[j] == i)
- best_eobs[j] = x->e_mbd.plane[0].eobs[j];
- } else {
- for (j = 0; j < 4; j++) {
- int ib = vp9_i8x8_block[j], idx = j * 4;
-
- if (labels[ib] == i)
- best_eobs[idx] = x->e_mbd.plane[0].eobs[idx];
- }
- }
- if (other_rd < best_other_rd)
- best_other_rd = other_rd;
+ for (j = 0; j < 4; j++)
+ if (labels[j] == i)
+ best_eobs[j] = x->e_mbd.plane[0].eobs[j];
vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s));
vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s));
-
}
} /*for each 4x4 mode*/
@@ -2146,10 +1472,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
segmentyrate += bestlabelyrate;
this_segment_rd += best_label_rd;
other_segment_rd += best_other_rd;
- if (rds)
- rds[i] = this_segment_rd;
- if (otherrds)
- otherrds[i] = other_segment_rd;
} /* for each label */
if (this_segment_rd < bsi->segment_rd) {
@@ -2157,11 +1479,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bsi->d = bd;
bsi->segment_yrate = segmentyrate;
bsi->segment_rd = this_segment_rd;
- bsi->segment_num = segmentation;
- bsi->txfm_size = mbmi->txfm_size;
// store everything needed to come back to this!!
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < 4; i++) {
bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv;
if (mbmi->second_ref_frame > 0)
bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv;
@@ -2169,118 +1489,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
bsi->eobs[i] = best_eobs[i];
}
}
-
- if (completed) {
- *completed = i;
- }
}
static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x,
BEST_SEG_INFO *bsi,
- unsigned int segmentation,
- /* 16 = n_blocks */
- int_mv seg_mvs[16][MAX_REF_FRAMES - 1],
- int64_t txfm_cache[NB_TXFM_MODES]) {
- int i, n, c = vp9_mbsplit_count[segmentation];
-
- if (segmentation == PARTITIONING_4X4) {
- int64_t rd[16];
-
- rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL,
- rd, &n, seg_mvs);
- if (n == c) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- if (rd[c - 1] < txfm_cache[i])
- txfm_cache[i] = rd[c - 1];
- }
- }
- } else {
- int64_t diff, base_rd;
- int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0);
- int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1);
-
- if (cpi->common.txfm_mode == TX_MODE_SELECT) {
- int64_t rd4x4[4], rd8x8[4];
- int n4x4, n8x8, nmin;
- BEST_SEG_INFO bsi4x4, bsi8x8;
-
- /* factor in cost of cost4x4/8x8 in decision */
- vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi));
- vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi));
- rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation,
- TX_4X4, NULL, rd4x4, &n4x4, seg_mvs);
- rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation,
- TX_8X8, NULL, rd8x8, &n8x8, seg_mvs);
- if (bsi4x4.segment_num == segmentation) {
- bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
- if (bsi4x4.segment_rd < bsi->segment_rd)
- vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi));
- }
- if (bsi8x8.segment_num == segmentation) {
- bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
- if (bsi8x8.segment_rd < bsi->segment_rd)
- vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi));
- }
- n = n4x4 > n8x8 ? n4x4 : n8x8;
- if (n == c) {
- nmin = n4x4 < n8x8 ? n4x4 : n8x8;
- diff = rd8x8[nmin - 1] - rd4x4[nmin - 1];
- if (n == n4x4) {
- base_rd = rd4x4[c - 1];
- } else {
- base_rd = rd8x8[c - 1] - diff;
- }
- }
- } else {
- int64_t rd[4], otherrd[4];
-
- if (cpi->common.txfm_mode == ONLY_4X4) {
- rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd,
- rd, &n, seg_mvs);
- if (n == c) {
- base_rd = rd[c - 1];
- diff = otherrd[c - 1] - rd[c - 1];
- }
- } else /* use 8x8 transform */ {
- rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd,
- rd, &n, seg_mvs);
- if (n == c) {
- diff = rd[c - 1] - otherrd[c - 1];
- base_rd = otherrd[c - 1];
- }
- }
- }
-
- if (n == c) {
- if (base_rd < txfm_cache[ONLY_4X4]) {
- txfm_cache[ONLY_4X4] = base_rd;
- }
- if (base_rd + diff < txfm_cache[ALLOW_8X8]) {
- txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] =
- txfm_cache[ALLOW_32X32] = base_rd + diff;
- }
- if (diff < 0) {
- base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0);
- } else {
- base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0);
- }
- if (base_rd < txfm_cache[TX_MODE_SELECT]) {
- txfm_cache[TX_MODE_SELECT] = base_rd;
- }
- }
- }
-}
-
-static INLINE void cal_step_param(int sr, int *sp) {
- int step = 0;
-
- if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP;
- else if (sr < 1) sr = 1;
-
- while (sr >>= 1)
- step++;
-
- *sp = MAX_MVSEARCH_STEPS - 1 - step;
+ int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
+ rd_check_segment_txsize(cpi, x, bsi, seg_mvs);
}
static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
@@ -2292,17 +1506,12 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
int *returnyrate,
int *returndistortion,
int *skippable, int mvthresh,
- int_mv seg_mvs[NB_PARTITIONINGS]
- [16 /* n_blocks */]
- [MAX_REF_FRAMES - 1],
- int64_t txfm_cache[NB_TXFM_MODES]) {
+ int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) {
int i;
BEST_SEG_INFO bsi;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
vpx_memset(&bsi, 0, sizeof(bsi));
- for (i = 0; i < NB_TXFM_MODES; i++)
- txfm_cache[i] = INT64_MAX;
bsi.segment_rd = best_rd;
bsi.ref_mv = best_ref_mv;
@@ -2310,121 +1519,41 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
bsi.mvp.as_int = best_ref_mv->as_int;
bsi.mvthresh = mvthresh;
bsi.mdcounts = mdcounts;
- bsi.txfm_size = TX_4X4;
- for (i = 0; i < 16; i++)
+ for (i = 0; i < 4; i++)
bsi.modes[i] = ZERO4X4;
- if (cpi->compressor_speed == 0) {
- /* for now, we will keep the original segmentation order
- when in best quality mode */
- rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
- seg_mvs[PARTITIONING_16X8], txfm_cache);
- rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
- seg_mvs[PARTITIONING_8X16], txfm_cache);
- rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
- seg_mvs[PARTITIONING_8X8], txfm_cache);
- rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
- seg_mvs[PARTITIONING_4X4], txfm_cache);
- } else {
- int sr;
-
- rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8,
- seg_mvs[PARTITIONING_8X8], txfm_cache);
-
- if (bsi.segment_rd < best_rd) {
- int tmp_col_min = x->mv_col_min;
- int tmp_col_max = x->mv_col_max;
- int tmp_row_min = x->mv_row_min;
- int tmp_row_max = x->mv_row_max;
-
- vp9_clamp_mv_min_max(x, best_ref_mv);
-
- /* Get 8x8 result */
- bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int;
- bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int;
- bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int;
- bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int;
-
- /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range
- * according to the closeness of 2 MV. */
- /* block 8X16 */
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3);
- cal_step_param(sr, &bsi.sv_istep[0]);
-
- sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
- cal_step_param(sr, &bsi.sv_istep[1]);
-
- rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16,
- seg_mvs[PARTITIONING_8X16], txfm_cache);
-
- /* block 16X8 */
- sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3);
- cal_step_param(sr, &bsi.sv_istep[0]);
-
- sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3,
- (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3);
- cal_step_param(sr, &bsi.sv_istep[1]);
-
- rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8,
- seg_mvs[PARTITIONING_16X8], txfm_cache);
-
- /* If 8x8 is better than 16x8/8x16, then do 4x4 search */
- /* Not skip 4x4 if speed=0 (good quality) */
- if (cpi->sf.no_skip_block4x4_search ||
- bsi.segment_num == PARTITIONING_8X8) {
- /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */
- bsi.mvp.as_int = bsi.sv_mvp[0].as_int;
- rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4,
- seg_mvs[PARTITIONING_4X4], txfm_cache);
- }
-
- /* restore UMV window */
- x->mv_col_min = tmp_col_min;
- x->mv_col_max = tmp_col_max;
- x->mv_row_min = tmp_row_min;
- x->mv_row_max = tmp_row_max;
- }
- }
+ rd_check_segment(cpi, x, &bsi, seg_mvs);
/* set it to the best */
- for (i = 0; i < 16; i++) {
+ for (i = 0; i < 4; i++) {
x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int;
if (mbmi->second_ref_frame > 0)
x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int =
- bsi.second_mvs[i].as_int;
+ bsi.second_mvs[i].as_int;
x->e_mbd.plane[0].eobs[i] = bsi.eobs[i];
}
/* save partitions */
- mbmi->txfm_size = bsi.txfm_size;
- mbmi->partitioning = bsi.segment_num;
- x->partition_info->count = vp9_mbsplit_count[bsi.segment_num];
+ x->partition_info->count = 4;
for (i = 0; i < x->partition_info->count; i++) {
- int j;
-
- j = vp9_mbsplit_offset[bsi.segment_num][i];
-
- x->partition_info->bmi[i].mode = bsi.modes[j];
- x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv;
+ x->partition_info->bmi[i].mode = bsi.modes[i];
+ x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv;
if (mbmi->second_ref_frame > 0)
- x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv;
+ x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv;
}
/*
* used to set mbmi->mv.as_int
*/
- x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int;
+ x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int;
if (mbmi->second_ref_frame > 0)
- x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int;
+ x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int;
*returntotrate = bsi.r;
*returndistortion = bsi.d;
*returnyrate = bsi.segment_yrate;
- *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_MB16X16);
+ *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8);
return (int)(bsi.segment_rd);
}
@@ -2474,22 +1603,10 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
x->mv_best_ref_index[ref_frame] = best_index;
}
-static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) {
- int i;
- MACROBLOCKD *xd = &x->e_mbd;
- for (i = 0; i < 4; i++) {
- int ib = vp9_i8x8_block[i];
- xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[i];
- xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[i];
- xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[i];
- xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[i];
- // printf("%d,%d,%d,%d\n",
- // modes[0], modes[1], modes[2], modes[3]);
- }
-}
-
extern void vp9_calc_ref_probs(int *count, vp9_prob *probs);
-static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) {
+static void estimate_curframe_refprobs(VP9_COMP *cpi,
+ vp9_prob mod_refprobs[3],
+ int pred_ref) {
int norm_cnt[MAX_REF_FRAMES];
const int *const rfct = cpi->count_mb_ref_frame_usage;
int intra_count = rfct[INTRA_FRAME];
@@ -2539,7 +1656,8 @@ static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1,
return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16;
}
-static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) {
+static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
+ unsigned int *ref_costs) {
VP9_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
vp9_prob *mod_refprobs;
@@ -2588,10 +1706,10 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int
// Get the prediction for the current mb
cost = weighted_cost(&pred_prob, &new_pred_prob, 0,
pred_flag, cpi->seg0_progress);
- if (cost > 1024) cost = 768; // i.e. account for 4 bits max.
+ if (cost > 1024) cost = 768; // i.e. account for 4 bits max.
// for incorrectly predicted cases
- if (! pred_flag) {
+ if (!pred_flag) {
vp9_prob curframe_mod_refprobs[3];
if (cpi->seg0_progress) {
@@ -2699,6 +1817,51 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
frame_type, block_size);
}
+
+static enum BlockSize get_block_size(int bw, int bh) {
+ if (bw == 4 && bh == 4)
+ return BLOCK_4X4;
+
+ if (bw == 4 && bh == 8)
+ return BLOCK_4X8;
+
+ if (bw == 8 && bh == 4)
+ return BLOCK_8X4;
+
+ if (bw == 8 && bh == 8)
+ return BLOCK_8X8;
+
+ if (bw == 8 && bh == 16)
+ return BLOCK_8X16;
+
+ if (bw == 16 && bh == 8)
+ return BLOCK_16X8;
+
+ if (bw == 16 && bh == 16)
+ return BLOCK_16X16;
+
+ if (bw == 32 && bh == 32)
+ return BLOCK_32X32;
+
+ if (bw == 32 && bh == 16)
+ return BLOCK_32X16;
+
+ if (bw == 16 && bh == 32)
+ return BLOCK_16X32;
+
+ if (bw == 64 && bh == 32)
+ return BLOCK_64X32;
+
+ if (bw == 32 && bh == 64)
+ return BLOCK_32X64;
+
+ if (bw == 64 && bh == 64)
+ return BLOCK_64X64;
+
+ assert(0);
+ return -1;
+}
+
static void model_rd_from_var_lapndz(int var, int n, int qstep,
int *rate, int *dist) {
// This function models the rate and distortion for a Laplacian
@@ -2742,6 +1905,36 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep,
vp9_clear_system_state();
}
+static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize,
+ MACROBLOCK *x, MACROBLOCKD *xd,
+ int *out_rate_sum, int *out_dist_sum) {
+ // Note our transform coeffs are 8 times an orthogonal transform.
+ // Hence quantizer step is also 8 times. To get effective quantizer
+ // we need to divide by 8 before sending to modeling function.
+ unsigned int sse, var;
+ int i, rate_sum = 0, dist_sum = 0;
+
+ for (i = 0; i < MAX_MB_PLANE; ++i) {
+ struct macroblock_plane *const p = &x->plane[i];
+ struct macroblockd_plane *const pd = &xd->plane[i];
+
+ const int bwl = b_width_log2(bsize) - pd->subsampling_x;
+ const int bhl = b_height_log2(bsize) - pd->subsampling_y;
+ const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl);
+ int rate, dist;
+ var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride, &sse);
+ model_rd_from_var_lapndz(var, 16 << (bwl + bhl),
+ pd->dequant[1] >> 3, &rate, &dist);
+
+ rate_sum += rate;
+ dist_sum += dist;
+ }
+
+ *out_rate_sum = rate_sum;
+ *out_dist_sum = dist_sum;
+}
+
static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
switch (bs) {
case BLOCK_64X64: return BLOCK_32X32;
@@ -2751,6 +1944,9 @@ static enum BlockSize y_to_uv_block_size(enum BlockSize bs) {
case BLOCK_32X16: return BLOCK_16X8;
case BLOCK_16X32: return BLOCK_8X16;
case BLOCK_16X16: return BLOCK_8X8;
+ case BLOCK_16X8: return BLOCK_8X4;
+ case BLOCK_8X16: return BLOCK_4X8;
+ case BLOCK_8X8: return BLOCK_4X4;
default:
assert(0);
return -1;
@@ -2766,6 +1962,9 @@ static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) {
case BLOCK_SIZE_SB32X16: return BLOCK_32X16;
case BLOCK_SIZE_SB16X32: return BLOCK_16X32;
case BLOCK_SIZE_MB16X16: return BLOCK_16X16;
+ case BLOCK_SIZE_SB16X8: return BLOCK_16X8;
+ case BLOCK_SIZE_SB8X16: return BLOCK_8X16;
+ case BLOCK_SIZE_SB8X8: return BLOCK_8X8;
default:
assert(0);
return -1;
@@ -2966,76 +2165,41 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
if (1) {
- int switchable_filter_index, newbest;
- int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0;
- int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0;
- for (switchable_filter_index = 0;
- switchable_filter_index < VP9_SWITCHABLE_FILTERS;
- ++switchable_filter_index) {
+ int i, newbest;
+ int tmp_rate_sum = 0, tmp_dist_sum = 0;
+ for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
int rs = 0;
- mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index];
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i];
+ const int is_intpel_interp = intpel_mv &&
+ vp9_is_interpolating_filter[filter];
+ mbmi->interp_filter = filter;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ if (cm->mcomp_filter_type == SWITCHABLE) {
const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP);
const int m = vp9_switchable_interp_map[mbmi->interp_filter];
rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m];
}
- if (interpolating_intpel_seen && intpel_mv &&
- vp9_is_interpolating_filter[mbmi->interp_filter]) {
- rd = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i,
- tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i);
+
+ if (interpolating_intpel_seen && is_intpel_interp) {
+ rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum);
} else {
- unsigned int sse, var;
- int tmp_rate_y, tmp_rate_u, tmp_rate_v;
- int tmp_dist_y, tmp_dist_u, tmp_dist_v;
+ int rate_sum = 0, dist_sum = 0;
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
- var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf,
- x->plane[0].src.stride,
- xd->plane[0].dst.buf,
- xd->plane[0].dst.stride,
- &sse);
- // Note our transform coeffs are 8 times an orthogonal transform.
- // Hence quantizer step is also 8 times. To get effective quantizer
- // we need to divide by 8 before sending to modeling function.
- model_rd_from_var_lapndz(var, MI_SIZE * bw * MI_SIZE * bh,
- xd->plane[0].dequant[1] >> 3,
- &tmp_rate_y, &tmp_dist_y);
- var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf,
- x->plane[1].src.stride,
- xd->plane[1].dst.buf,
- xd->plane[1].dst.stride,
- &sse);
- model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh,
- xd->plane[1].dequant[1] >> 3,
- &tmp_rate_u, &tmp_dist_u);
- var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf,
- x->plane[1].src.stride,
- xd->plane[2].dst.buf,
- xd->plane[1].dst.stride,
- &sse);
- model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh,
- xd->plane[2].dequant[1] >> 3,
- &tmp_rate_v, &tmp_dist_v);
- rd = RDCOST(x->rdmult, x->rddiv,
- rs + tmp_rate_y + tmp_rate_u + tmp_rate_v,
- tmp_dist_y + tmp_dist_u + tmp_dist_v);
- if (!interpolating_intpel_seen && intpel_mv &&
- vp9_is_interpolating_filter[mbmi->interp_filter]) {
- tmp_rate_y_i = tmp_rate_y;
- tmp_rate_u_i = tmp_rate_u;
- tmp_rate_v_i = tmp_rate_v;
- tmp_dist_y_i = tmp_dist_y;
- tmp_dist_u_i = tmp_dist_u;
- tmp_dist_v_i = tmp_dist_v;
+ model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
+ rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum);
+ if (!interpolating_intpel_seen && is_intpel_interp) {
+ tmp_rate_sum = rate_sum;
+ tmp_dist_sum = dist_sum;
}
}
- newbest = (switchable_filter_index == 0 || rd < best_rd);
+ newbest = i == 0 || rd < best_rd;
+
if (newbest) {
best_rd = rd;
*best_filter = mbmi->interp_filter;
}
+
if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
(cm->mcomp_filter_type != SWITCHABLE &&
cm->mcomp_filter_type == mbmi->interp_filter)) {
@@ -3050,21 +2214,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
sizeof(unsigned char) * MI_UV_SIZE * bw);
for (i = 0; i < MI_UV_SIZE * bh; ++i)
vpx_memcpy(tmp_vbuf + i * MI_UV_SIZE * bw,
- xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
+ xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
sizeof(unsigned char) * MI_UV_SIZE * bw);
pred_exists = 1;
}
- interpolating_intpel_seen |=
- intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter];
+ interpolating_intpel_seen |= is_intpel_interp;
}
}
// Set the appripriate filter
- if (cm->mcomp_filter_type != SWITCHABLE)
- mbmi->interp_filter = cm->mcomp_filter_type;
- else
- mbmi->interp_filter = *best_filter;
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
+ cm->mcomp_filter_type : *best_filter;
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+
if (pred_exists) {
// FIXME(rbultje): mb code still predicts into xd->predictor
@@ -3077,7 +2239,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
tmp_ubuf + i * bw * MI_UV_SIZE,
sizeof(unsigned char) * bw * MI_UV_SIZE);
for (i = 0; i < bh * MI_UV_SIZE; ++i)
- vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[1].dst.stride,
+ vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[2].dst.stride,
tmp_vbuf + i * bw * MI_UV_SIZE,
sizeof(unsigned char) * bw * MI_UV_SIZE);
} else {
@@ -3193,867 +2355,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
return this_rd; // if 0, this will be re-calculated by caller
}
-static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int mi_row, int mi_col,
- int *returnrate, int *returndistortion,
- int64_t *returnintra) {
- static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
- VP9_ALT_FLAG };
- VP9_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- union b_mode_info best_bmodes[16];
- MB_MODE_INFO best_mbmode;
- PARTITION_INFO best_partition;
- int_mv best_ref_mv, second_best_ref_mv;
- MB_PREDICTION_MODE this_mode;
- MB_PREDICTION_MODE best_mode = DC_PRED;
- MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi;
- int i, best_mode_index = 0;
- int mode8x8[4];
- unsigned char segment_id = mbmi->segment_id;
-
- int mode_index;
- int mdcounts[4];
- int rate, distortion;
- int rate2, distortion2;
- int64_t best_txfm_rd[NB_TXFM_MODES];
- int64_t best_txfm_diff[NB_TXFM_MODES];
- int64_t best_pred_diff[NB_PREDICTION_TYPES];
- int64_t best_pred_rd[NB_PREDICTION_TYPES];
- int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX;
-#if CONFIG_COMP_INTERINTRA_PRED
- int is_best_interintra = 0;
- int64_t best_intra16_rd = INT64_MAX;
- int best_intra16_mode = DC_PRED;
-#if SEPARATE_INTERINTRA_UV
- int best_intra16_uv_mode = DC_PRED;
-#endif
-#endif
- int64_t best_overall_rd = INT64_MAX;
- INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE;
- INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE;
- int uv_intra_rate[2], uv_intra_distortion[2], uv_intra_rate_tokenonly[2];
- int uv_intra_skippable[2];
- MB_PREDICTION_MODE uv_intra_mode[2];
- int rate_y, UNINITIALIZED_IS_SAFE(rate_uv);
- int distortion_uv = INT_MAX;
- int64_t best_yrd = INT64_MAX;
-
- int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
- int frame_mdcounts[4][4];
- YV12_BUFFER_CONFIG yv12_mb[4];
-
- unsigned int ref_costs[MAX_REF_FRAMES];
- int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1];
-
- int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
- cpi->common.y_dc_delta_q);
- int64_t mode_distortions[MB_MODE_COUNT] = {-1};
- int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
- int ref_frame;
-
- struct scale_factors scale_factor[4];
-
- vpx_memset(mode8x8, 0, sizeof(mode8x8));
- vpx_memset(&frame_mv, 0, sizeof(frame_mv));
- vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
- vpx_memset(&best_bmodes, 0, sizeof(best_bmodes));
- vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0,
- sizeof(PICK_MODE_CONTEXT));
-
- x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error = 0;
- x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error = 0;
-
- for (i = 0; i < MAX_REF_FRAMES; i++)
- frame_mv[NEWMV][i].as_int = INVALID_MV;
- for (i = 0; i < NB_PREDICTION_TYPES; ++i)
- best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < NB_TXFM_MODES; i++)
- best_txfm_rd[i] = INT64_MAX;
-
- for (i = 0; i < NB_PARTITIONINGS; i++) {
- int j, k;
-
- for (j = 0; j < 16; j++)
- for (k = 0; k < MAX_REF_FRAMES - 1; k++)
- seg_mvs[i][j][k].as_int = INVALID_MV;
- }
-
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- setup_buffer_inter(cpi, x, cpi->lst_fb_idx,
- LAST_FRAME, BLOCK_16X16, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb, scale_factor);
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- setup_buffer_inter(cpi, x, cpi->gld_fb_idx,
- GOLDEN_FRAME, BLOCK_16X16, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb, scale_factor);
- }
-
- if (cpi->ref_frame_flags & VP9_ALT_FLAG) {
- setup_buffer_inter(cpi, x, cpi->alt_fb_idx,
- ALTREF_FRAME, BLOCK_16X16, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV],
- frame_mdcounts, yv12_mb, scale_factor);
- }
-
- *returnintra = INT64_MAX;
-
- mbmi->ref_frame = INTRA_FRAME;
-
- /* Initialize zbin mode boost for uv costing */
- cpi->zbin_mode_boost = 0;
- vp9_update_zbin_extra(cpi, x);
-
- xd->mode_info_context->mbmi.mode = DC_PRED;
-
- for (i = 0; i <= TX_8X8; i++) {
- mbmi->txfm_size = i;
- rd_pick_intra_sbuv_mode(cpi, x, &uv_intra_rate[i],
- &uv_intra_rate_tokenonly[i],
- &uv_intra_distortion[i],
- &uv_intra_skippable[i],
- BLOCK_SIZE_MB16X16);
- uv_intra_mode[i] = mbmi->uv_mode;
- }
-
- // Get estimates of reference frame costs for each reference frame
- // that depend on the current prediction etc.
- estimate_ref_frame_costs(cpi, segment_id, ref_costs);
-
- for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
- int64_t this_rd = INT64_MAX;
- int disable_skip = 0, skippable = 0;
- int other_cost = 0;
- int compmode_cost = 0;
-#if CONFIG_COMP_INTERINTRA_PRED
- int compmode_interintra_cost = 0;
-#endif
- int mode_excluded = 0;
- int64_t txfm_cache[NB_TXFM_MODES] = { 0 };
- YV12_BUFFER_CONFIG *scaled_ref_frame;
-
- // These variables hold are rolling total cost and distortion for this mode
- rate2 = 0;
- distortion2 = 0;
- rate_y = 0;
- rate_uv = 0;
-
- x->skip = 0;
-
- this_mode = vp9_mode_order[mode_index].mode;
- mbmi->mode = this_mode;
- mbmi->uv_mode = DC_PRED;
- mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame;
- mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
-
- mbmi->interp_filter = cm->mcomp_filter_type;
-
- set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
- scale_factor);
-
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-
- // Test best rd so far against threshold for trying this mode.
- if (best_rd <= cpi->rd_threshes[mode_index])
- continue;
-
- // Ensure that the references used by this mode are available.
- if (mbmi->ref_frame &&
- !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame]))
- continue;
-
- if (mbmi->second_ref_frame > 0 &&
- !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame]))
- continue;
-
- // only scale on zeromv.
- if (mbmi->ref_frame > 0 &&
- (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
- yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
- this_mode != ZEROMV)
- continue;
-
- if (mbmi->second_ref_frame > 0 &&
- (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
- yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
- this_mode != ZEROMV)
- continue;
-
- // current coding mode under rate-distortion optimization test loop
-#if CONFIG_COMP_INTERINTRA_PRED
- mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
- mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
-#endif
-
- // If the segment reference frame feature is enabled....
- // then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) {
- continue;
- // If the segment skip feature is enabled....
- // then do nothing if the current mode is not allowed..
- } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) &&
- (this_mode != ZEROMV)) {
- continue;
- // Disable this drop out case if the ref frame segment
- // level feature is enabled for this segment. This is to
- // prevent the possibility that the we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) {
- // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay,
- // unless ARNR filtering is enabled in which case we want
- // an unfiltered alternative
- if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
- if (this_mode != ZEROMV ||
- mbmi->ref_frame != ALTREF_FRAME) {
- continue;
- }
- }
- }
-
- /* everything but intra */
- scaled_ref_frame = NULL;
- if (mbmi->ref_frame) {
- int ref = mbmi->ref_frame;
- int fb;
-
- best_ref_mv = mbmi->ref_mvs[ref][0];
- vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts));
-
- if (mbmi->ref_frame == LAST_FRAME) {
- fb = cpi->lst_fb_idx;
- } else if (mbmi->ref_frame == GOLDEN_FRAME) {
- fb = cpi->gld_fb_idx;
- } else {
- fb = cpi->alt_fb_idx;
- }
-
- if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb])
- scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]];
- }
-
- if (mbmi->second_ref_frame > 0) {
- int ref = mbmi->second_ref_frame;
-
- second_best_ref_mv = mbmi->ref_mvs[ref][0];
- }
-
- // TODO(jkoleszar) scaling/translation handled during creation of yv12_mb
- // currently.
- setup_pre_planes(xd, &yv12_mb[mbmi->ref_frame],
- mbmi->second_ref_frame > 0 ? &yv12_mb[mbmi->second_ref_frame] : NULL,
- 0, 0, NULL, NULL);
-
- // Experimental code. Special case for gf and arf zeromv modes.
- // Increase zbin size to suppress noise
- if (cpi->zbin_mode_boost_enabled) {
- if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME)
- cpi->zbin_mode_boost = 0;
- else {
- if (vp9_mode_order[mode_index].mode == ZEROMV) {
- if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME)
- cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
- else
- cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
- } else if (vp9_mode_order[mode_index].mode == SPLITMV)
- cpi->zbin_mode_boost = 0;
- else
- cpi->zbin_mode_boost = MV_ZBIN_BOOST;
- }
-
- vp9_update_zbin_extra(cpi, x);
- }
-
- // Intra
- if (!mbmi->ref_frame) {
- switch (this_mode) {
- default:
- case V_PRED:
- case H_PRED:
- case D45_PRED:
- case D135_PRED:
- case D117_PRED:
- case D153_PRED:
- case D27_PRED:
- case D63_PRED:
- rate2 += intra_cost_penalty;
- case DC_PRED:
- case TM_PRED:
- mbmi->ref_frame = INTRA_FRAME;
- // FIXME compound intra prediction
- vp9_build_intra_predictors_sby_s(&x->e_mbd, BLOCK_SIZE_MB16X16);
- // vp9_build_intra_predictors_mby(&x->e_mbd);
- super_block_yrd(cpi, x, &rate_y, &distortion, &skippable,
- BLOCK_SIZE_MB16X16, txfm_cache);
- rate2 += rate_y;
- distortion2 += distortion;
- rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode];
-
- rate2 += uv_intra_rate[mbmi->txfm_size != TX_4X4];
- rate_uv = uv_intra_rate_tokenonly[mbmi->txfm_size != TX_4X4];
- distortion2 += uv_intra_distortion[mbmi->txfm_size != TX_4X4];
- distortion_uv = uv_intra_distortion[mbmi->txfm_size != TX_4X4];
- skippable = skippable &&
- uv_intra_skippable[mbmi->txfm_size != TX_4X4];
- break;
- case I4X4_PRED: {
- int64_t tmp_rd;
-
- // Note the rate value returned here includes the cost of coding
- // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED];
- mbmi->txfm_size = TX_4X4;
- tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
- &distortion, best_yrd);
- rate2 += rate;
- rate2 += intra_cost_penalty;
- distortion2 += distortion;
-
- if (tmp_rd < best_yrd) {
- rate2 += uv_intra_rate[TX_4X4];
- rate_uv = uv_intra_rate_tokenonly[TX_4X4];
- distortion2 += uv_intra_distortion[TX_4X4];
- distortion_uv = uv_intra_distortion[TX_4X4];
- } else {
- this_rd = INT64_MAX;
- disable_skip = 1;
- }
- }
- break;
- case I8X8_PRED: {
- int64_t tmp_rd;
-
- tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y,
- &distortion, mode8x8,
- best_yrd, txfm_cache);
- rate2 += rate;
- rate2 += intra_cost_penalty;
- distortion2 += distortion;
-
- /* TODO: uv rate maybe over-estimated here since there is UV intra
- mode coded in I8X8_PRED prediction */
- if (tmp_rd < best_yrd) {
- rate2 += uv_intra_rate[TX_4X4];
- rate_uv = uv_intra_rate_tokenonly[TX_4X4];
- distortion2 += uv_intra_distortion[TX_4X4];
- distortion_uv = uv_intra_distortion[TX_4X4];
- } else {
- this_rd = INT64_MAX;
- disable_skip = 1;
- }
- }
- break;
- }
- }
- // Split MV. The code is very different from the other inter modes so
- // special case it.
- else if (this_mode == SPLITMV) {
- const int is_comp_pred = mbmi->second_ref_frame > 0;
- int64_t this_rd_thresh;
- int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
- int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
- int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
- int switchable_filter_index;
- int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL;
- union b_mode_info tmp_best_bmodes[16];
- MB_MODE_INFO tmp_best_mbmode;
- PARTITION_INFO tmp_best_partition;
- int pred_exists = 0;
-
- this_rd_thresh =
- (mbmi->ref_frame == LAST_FRAME) ?
- cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
- this_rd_thresh =
- (mbmi->ref_frame == GOLDEN_FRAME) ?
- cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
- xd->mode_info_context->mbmi.txfm_size = TX_4X4;
-
- for (switchable_filter_index = 0;
- switchable_filter_index < VP9_SWITCHABLE_FILTERS;
- ++switchable_filter_index) {
- int newbest;
- mbmi->interp_filter =
- vp9_switchable_interp[switchable_filter_index];
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-
- tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- second_ref, best_yrd, mdcounts,
- &rate, &rate_y, &distortion,
- &skippable,
- (int)this_rd_thresh, seg_mvs,
- txfm_cache);
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
- [vp9_get_pred_context(&cpi->common, xd,
- PRED_SWITCHABLE_INTERP)]
- [vp9_switchable_interp_map[mbmi->interp_filter]];
- tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
- }
- newbest = (tmp_rd < tmp_best_rd);
- if (newbest) {
- tmp_best_filter = mbmi->interp_filter;
- tmp_best_rd = tmp_rd;
- }
- if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
- (mbmi->interp_filter == cm->mcomp_filter_type &&
- cm->mcomp_filter_type != SWITCHABLE)) {
- tmp_best_rdu = tmp_rd;
- tmp_best_rate = rate;
- tmp_best_ratey = rate_y;
- tmp_best_distortion = distortion;
- tmp_best_skippable = skippable;
- vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO));
- vpx_memcpy(&tmp_best_partition, x->partition_info,
- sizeof(PARTITION_INFO));
- for (i = 0; i < 16; i++) {
- tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
- }
- pred_exists = 1;
- }
- } // switchable_filter_index loop
-
- mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
- tmp_best_filter : cm->mcomp_filter_type);
- vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
- if (!pred_exists) {
- // Handles the special case when a filter that is not in the
- // switchable list (bilinear, 6-tap) is indicated at the frame level
- tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv,
- second_ref, best_yrd, mdcounts,
- &rate, &rate_y, &distortion,
- &skippable,
- (int)this_rd_thresh, seg_mvs,
- txfm_cache);
- } else {
- if (cpi->common.mcomp_filter_type == SWITCHABLE) {
- int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
- [vp9_get_pred_context(&cpi->common, xd,
- PRED_SWITCHABLE_INTERP)]
- [vp9_switchable_interp_map[mbmi->interp_filter]];
- tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
- }
- tmp_rd = tmp_best_rdu;
- rate = tmp_best_rate;
- rate_y = tmp_best_ratey;
- distortion = tmp_best_distortion;
- skippable = tmp_best_skippable;
- vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO));
- vpx_memcpy(x->partition_info, &tmp_best_partition,
- sizeof(PARTITION_INFO));
- for (i = 0; i < 16; i++) {
- xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
- }
- }
-
- rate2 += rate;
- distortion2 += distortion;
-
- if (cpi->common.mcomp_filter_type == SWITCHABLE)
- rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
- [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
- [vp9_switchable_interp_map[mbmi->interp_filter]];
-
- // If even the 'Y' rd value of split is higher than best so far
- // then dont bother looking at UV
- if (tmp_rd < best_yrd) {
- int uv_skippable;
-
- vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
- BLOCK_SIZE_MB16X16);
-
- vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16);
-
- super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv,
- &uv_skippable, BLOCK_SIZE_MB16X16);
- rate2 += rate_uv;
- distortion2 += distortion_uv;
- skippable = skippable && uv_skippable;
- } else {
- this_rd = INT64_MAX;
- disable_skip = 1;
- }
-
- if (!mode_excluded) {
- if (is_comp_pred)
- mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
- else
- mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
- }
-
- compmode_cost =
- vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
- mbmi->mode = this_mode;
- }
- else {
-#if CONFIG_COMP_INTERINTRA_PRED
- if (mbmi->second_ref_frame == INTRA_FRAME) {
- if (best_intra16_mode == DC_PRED - 1) continue;
- mbmi->interintra_mode = best_intra16_mode;
-#if SEPARATE_INTERINTRA_UV
- mbmi->interintra_uv_mode = best_intra16_uv_mode;
-#else
- mbmi->interintra_uv_mode = best_intra16_mode;
-#endif
- }
-#endif
- this_rd = handle_inter_mode(cpi, x, BLOCK_SIZE_MB16X16,
- mdcounts, txfm_cache,
- &rate2, &distortion2, &skippable,
- &compmode_cost,
-#if CONFIG_COMP_INTERINTRA_PRED
- &compmode_interintra_cost,
-#endif
- &rate_y, &distortion,
- &rate_uv, &distortion_uv,
- &mode_excluded, &disable_skip,
- mode_index, &tmp_best_filter, frame_mv,
- scaled_ref_frame, mi_row, mi_col);
- if (this_rd == INT64_MAX)
- continue;
- }
-
-#if CONFIG_COMP_INTERINTRA_PRED
- if (cpi->common.use_interintra)
- rate2 += compmode_interintra_cost;
-#endif
-
- if (cpi->common.comp_pred_mode == HYBRID_PREDICTION)
- rate2 += compmode_cost;
-
- // Estimate the reference frame signaling cost and add it
- // to the rolling cost variable.
- rate2 += ref_costs[mbmi->ref_frame];
-
- if (!disable_skip) {
- // Test for the condition where skip block will be activated
- // because there are no non zero coefficients and make any
- // necessary adjustment for rate. Ignore if skip is coded at
- // segment level as the cost wont have been added in.
- int mb_skip_allowed;
-
- // Is Mb level skip allowed (i.e. not coded at segment level).
- mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP);
-
- if (skippable) {
- mbmi->mb_skip_coeff = 1;
-
- // Back out the coefficient coding costs
- rate2 -= (rate_y + rate_uv);
- // for best_yrd calculation
- rate_uv = 0;
-
- if (mb_skip_allowed) {
- int prob_skip_cost;
-
- // Cost the skip mb case
- vp9_prob skip_prob =
- vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP);
-
- if (skip_prob) {
- prob_skip_cost = vp9_cost_bit(skip_prob, 1);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
- }
- } else {
- // Add in the cost of the no skip flag.
- mbmi->mb_skip_coeff = 0;
- if (mb_skip_allowed) {
- int prob_skip_cost = vp9_cost_bit(
- vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0);
- rate2 += prob_skip_cost;
- other_cost += prob_skip_cost;
- }
- }
-
- // Calculate the final RD estimate for this mode.
- this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- }
-
- // Keep record of best intra distortion
- if ((mbmi->ref_frame == INTRA_FRAME) &&
- (this_rd < best_intra_rd)) {
- best_intra_rd = this_rd;
- *returnintra = distortion2;
- }
-#if CONFIG_COMP_INTERINTRA_PRED
- if ((mbmi->ref_frame == INTRA_FRAME) &&
- (this_mode <= TM_PRED) &&
- (this_rd < best_intra16_rd)) {
- best_intra16_rd = this_rd;
- best_intra16_mode = this_mode;
-#if SEPARATE_INTERINTRA_UV
- best_intra16_uv_mode = uv_intra_mode[mbmi->txfm_size != TX_4X4];
-#endif
- }
-#endif
-
- if (!disable_skip && mbmi->ref_frame == INTRA_FRAME)
- for (i = 0; i < NB_PREDICTION_TYPES; ++i)
- best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
-
- if (this_rd < best_overall_rd) {
- best_overall_rd = this_rd;
- best_filter = tmp_best_filter;
- best_mode = this_mode;
-#if CONFIG_COMP_INTERINTRA_PRED
- is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME);
-#endif
- }
-
- // Store the respective mode distortions for later use.
- // Store the respective mode distortions for later use.
- if (mode_distortions[this_mode] == -1
- || distortion2 < mode_distortions[this_mode]) {
- mode_distortions[this_mode] = distortion2;
- }
- if (frame_distortions[mbmi->ref_frame] == -1 ||
- distortion2 < frame_distortions[mbmi->ref_frame]) {
- frame_distortions[mbmi->ref_frame] = distortion2;
- }
-
- // Did this mode help.. i.e. is it the new best mode
- if (this_rd < best_rd || x->skip) {
- if (!mode_excluded) {
- /*
- if (mbmi->second_ref_frame == INTRA_FRAME) {
- printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd);
- }
- */
- // Note index of best mode so far
- best_mode_index = mode_index;
-
- if (this_mode <= I4X4_PRED) {
- if (mbmi->txfm_size != TX_4X4
- && this_mode != I4X4_PRED
- && this_mode != I8X8_PRED)
- mbmi->uv_mode = uv_intra_mode[TX_8X8];
- else
- mbmi->uv_mode = uv_intra_mode[TX_4X4];
- /* required for left and above block mv */
- mbmi->mv[0].as_int = 0;
- }
-
- other_cost += ref_costs[mbmi->ref_frame];
-
- /* Calculate the final y RD estimate for this mode */
- best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost),
- (distortion2 - distortion_uv));
-
- *returnrate = rate2;
- *returndistortion = distortion2;
- best_rd = this_rd;
- vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
- vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
-
- if ((this_mode == I4X4_PRED)
- || (this_mode == I8X8_PRED)
- || (this_mode == SPLITMV))
- for (i = 0; i < 16; i++) {
- best_bmodes[i] = xd->mode_info_context->bmi[i];
- }
- }
-
- // Testing this mode gave rise to an improvement in best error score.
- // Lower threshold a bit for next time
- cpi->rd_thresh_mult[mode_index] =
- (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ?
- cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
- cpi->rd_threshes[mode_index] =
- (cpi->rd_baseline_thresh[mode_index] >> 7) *
- cpi->rd_thresh_mult[mode_index];
- } else {
- // If the mode did not help improve the best error case then raise the
- // threshold for testing that mode next time around.
- cpi->rd_thresh_mult[mode_index] += 4;
-
- if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
- cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
-
- cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7)
- * cpi->rd_thresh_mult[mode_index];
- }
-
- /* keep record of best compound/single-only prediction */
- if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) {
- int64_t single_rd, hybrid_rd;
- int single_rate, hybrid_rate;
-
- if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
- single_rate = rate2 - compmode_cost;
- hybrid_rate = rate2;
- } else {
- single_rate = rate2;
- hybrid_rate = rate2 + compmode_cost;
- }
-
- single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
- hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
-
- if (mbmi->second_ref_frame <= INTRA_FRAME &&
- single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) {
- best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd;
- } else if (mbmi->second_ref_frame > INTRA_FRAME &&
- single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) {
- best_pred_rd[COMP_PREDICTION_ONLY] = single_rd;
- }
- if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION])
- best_pred_rd[HYBRID_PREDICTION] = hybrid_rd;
- }
-
- /* keep record of best txfm size */
- if (!mode_excluded && this_rd != INT64_MAX) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- int64_t adj_rd;
- if (this_mode != I4X4_PRED) {
- const int64_t txfm_mode_diff =
- txfm_cache[i] - txfm_cache[cm->txfm_mode];
- adj_rd = this_rd + txfm_mode_diff;
- } else {
- adj_rd = this_rd;
- }
- if (adj_rd < best_txfm_rd[i])
- best_txfm_rd[i] = adj_rd;
- }
- }
-
- if (x->skip && !mode_excluded)
- break;
- }
-
- assert((cm->mcomp_filter_type == SWITCHABLE) ||
- (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
- (best_mbmode.mode <= I4X4_PRED));
-
-#if CONFIG_COMP_INTERINTRA_PRED
- ++cpi->interintra_select_count[is_best_interintra];
-#endif
-
- // Accumulate filter usage stats
- // TODO(agrange): Use RD criteria to select interpolation filter mode.
- if (is_inter_mode(best_mode))
- ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]];
-
- // Reduce the activation RD thresholds for the best choice mode
- if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
- (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
- int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
-
- cpi->rd_thresh_mult[best_mode_index] =
- (cpi->rd_thresh_mult[best_mode_index] >=
- (MIN_THRESHMULT + best_adjustment)) ?
- cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
- cpi->rd_threshes[best_mode_index] =
- (cpi->rd_baseline_thresh[best_mode_index] >> 7) *
- cpi->rd_thresh_mult[best_mode_index];
- }
-
- // This code forces Altref,0,0 and skip for the frame that overlays a
- // an alrtef unless Altref is filtered. However, this is unsafe if
- // segment level coding of ref frame is enabled for this
- // segment.
- if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
- cpi->is_src_frame_alt_ref &&
- (cpi->oxcf.arnr_max_frames == 0) &&
- (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
- mbmi->mode = ZEROMV;
- if (cm->txfm_mode <= ALLOW_8X8)
- mbmi->txfm_size = cm->txfm_mode;
- else
- mbmi->txfm_size = TX_16X16;
- mbmi->ref_frame = ALTREF_FRAME;
- mbmi->mv[0].as_int = 0;
- mbmi->uv_mode = DC_PRED;
- mbmi->mb_skip_coeff = 1;
- mbmi->partitioning = 0;
- set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
- scale_factor);
-
- vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
- vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
- goto end;
- }
-
- // macroblock modes
- vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
- if (best_mbmode.mode == I4X4_PRED) {
- for (i = 0; i < 16; i++) {
- xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
- }
- }
-
- if (best_mbmode.mode == I8X8_PRED)
- set_i8x8_block_modes(x, mode8x8);
-
- if (best_mbmode.mode == SPLITMV) {
- for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv[0].as_int =
- best_bmodes[i].as_mv[0].as_int;
- if (mbmi->second_ref_frame > 0)
- for (i = 0; i < 16; i++)
- xd->mode_info_context->bmi[i].as_mv[1].as_int =
- best_bmodes[i].as_mv[1].as_int;
-
- vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
-
- mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int;
- mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int;
- }
-
- for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
- if (best_pred_rd[i] == INT64_MAX)
- best_pred_diff[i] = INT_MIN;
- else
- best_pred_diff[i] = best_rd - best_pred_rd[i];
- }
-
- if (!x->skip) {
- for (i = 0; i < NB_TXFM_MODES; i++) {
- if (best_txfm_rd[i] == INT64_MAX)
- best_txfm_diff[i] = 0;
- else
- best_txfm_diff[i] = best_rd - best_txfm_rd[i];
- }
- } else {
- vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
- }
-
-end:
-
- // Flag all modes that have a distortion thats > 2x the best we found at
- // this level.
- for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
- if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV
- || mode_index == SPLITMV)
- continue;
-
- if (mode_distortions[mode_index] > 2 * *returndistortion) {
- x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error |= (1
- << mode_index);
- }
- }
-
- // Flag all ref frames that have a distortion thats > 2x the best we found at
- // this level.
- for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
- if (frame_distortions[ref_frame] > 2 * *returndistortion) {
- x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error |= (1
- << ref_frame);
- }
- }
-
- set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
- scale_factor);
- store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index],
- best_mode_index, &best_partition,
- &mbmi->ref_mvs[mbmi->ref_frame][0],
- &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
- mbmi->second_ref_frame][0],
- best_pred_diff, best_txfm_diff);
-}
-
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int *returnrate, int *returndist,
BLOCK_SIZE_TYPE bsize,
@@ -4065,14 +2366,24 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int dist_y = 0, dist_uv;
int y_skip = 0, uv_skip;
int64_t txfm_cache[NB_TXFM_MODES], err;
+ MB_PREDICTION_MODE mode;
+ TX_SIZE txfm_size;
+ int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y;
+ int64_t err4x4 = INT64_MAX;
int i;
ctx->skip = 0;
xd->mode_info_context->mbmi.mode = DC_PRED;
err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
&dist_y, &y_skip, bsize, txfm_cache);
+ mode = xd->mode_info_context->mbmi.mode;
+ txfm_size = xd->mode_info_context->mbmi.txfm_size;
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
&dist_uv, &uv_skip, bsize);
+ if (bsize == BLOCK_SIZE_SB8X8)
+ err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y,
+ &rate4x4_y_tokenonly,
+ &dist4x4_y, err);
if (y_skip && uv_skip) {
*returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
@@ -4080,145 +2391,30 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returndist = dist_y + (dist_uv >> 2);
memset(ctx->txfm_rd_diff, 0,
sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff));
+ xd->mode_info_context->mbmi.mode = mode;
+ xd->mode_info_context->mbmi.txfm_size = txfm_size;
+ } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) {
+ *returnrate = rate4x4_y + rate_uv +
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
+ *returndist = dist4x4_y + (dist_uv >> 2);
+ for (i = 0; i < NB_TXFM_MODES; i++) {
+ ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
+ }
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
} else {
*returnrate = rate_y + rate_uv +
vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
*returndist = dist_y + (dist_uv >> 2);
for (i = 0; i < NB_TXFM_MODES; i++) {
- ctx->txfm_rd_diff[i] = err - txfm_cache[i];
+ ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]);
}
+ xd->mode_info_context->mbmi.txfm_size = txfm_size;
+ xd->mode_info_context->mbmi.mode = mode;
}
vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO));
}
-void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int *returnrate, int *returndist) {
- VP9_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
- int64_t error4x4, error16x16;
- int rate4x4, rate16x16 = 0, rateuv[2];
- int dist4x4 = 0, dist16x16 = 0, distuv[2];
- int rate;
- int rate4x4_tokenonly = 0;
- int rate16x16_tokenonly = 0;
- int rateuv_tokenonly[2];
- int64_t error8x8;
- int rate8x8_tokenonly=0;
- int rate8x8, dist8x8;
- int mode16x16;
- int mode8x8[4];
- int dist;
- int modeuv[2], uv_intra_skippable[2];
- int y_intra16x16_skippable = 0;
- int64_t txfm_cache[2][NB_TXFM_MODES];
- TX_SIZE txfm_size_16x16, txfm_size_8x8;
- int i;
-
- x->mb_context[xd->sb_index][xd->mb_index].skip = 0;
- mbmi->ref_frame = INTRA_FRAME;
- mbmi->mode = DC_PRED;
- for (i = 0; i <= TX_8X8; i++) {
- mbmi->txfm_size = i;
- rd_pick_intra_sbuv_mode(cpi, x, &rateuv[i], &rateuv_tokenonly[i],
- &distuv[i], &uv_intra_skippable[i],
- BLOCK_SIZE_MB16X16);
- modeuv[i] = mbmi->uv_mode;
- }
-
- // current macroblock under rate-distortion optimization test loop
- error16x16 = rd_pick_intra_sby_mode(cpi, x, &rate16x16,
- &rate16x16_tokenonly, &dist16x16,
- &y_intra16x16_skippable,
- BLOCK_SIZE_MB16X16, txfm_cache[1]);
- mode16x16 = mbmi->mode;
- txfm_size_16x16 = mbmi->txfm_size;
- if (y_intra16x16_skippable &&
- ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
- (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
- error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0);
- rate16x16 -= rate16x16_tokenonly;
- }
- for (i = 0; i < NB_TXFM_MODES; i++) {
- txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] +
- txfm_cache[1][i];
- }
-
- error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8,
- &rate8x8_tokenonly,
- &dist8x8, mode8x8,
- error16x16, txfm_cache[1]);
- txfm_size_8x8 = mbmi->txfm_size;
- for (i = 0; i < NB_TXFM_MODES; i++) {
- int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i];
- if (tmp_rd < txfm_cache[0][i])
- txfm_cache[0][i] = tmp_rd;
- }
-
- mbmi->txfm_size = TX_4X4;
- error4x4 = rd_pick_intra4x4mby_modes(cpi, x,
- &rate4x4, &rate4x4_tokenonly,
- &dist4x4, error16x16);
- for (i = 0; i < NB_TXFM_MODES; i++) {
- if (error4x4 < txfm_cache[0][i])
- txfm_cache[0][i] = error4x4;
- }
-
- mbmi->mb_skip_coeff = 0;
- if (y_intra16x16_skippable &&
- ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) ||
- (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) {
- mbmi->mb_skip_coeff = 1;
- mbmi->mode = mode16x16;
- mbmi->uv_mode = modeuv[cm->txfm_mode != ONLY_4X4];
- rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1);
- dist = dist16x16;
- rate += rateuv[cm->txfm_mode != ONLY_4X4] -
- rateuv_tokenonly[cm->txfm_mode != ONLY_4X4];
- dist += (distuv[cm->txfm_mode != ONLY_4X4] >> 2);
- mbmi->txfm_size = txfm_size_16x16;
- } else if (error8x8 > error16x16) {
- if (error4x4 < error16x16) {
- rate = rateuv[TX_4X4] + rate4x4;
- mbmi->mode = I4X4_PRED;
- mbmi->txfm_size = TX_4X4;
- dist = dist4x4 + (distuv[TX_4X4] >> 2);
- mbmi->uv_mode = modeuv[TX_4X4];
- } else {
- mbmi->txfm_size = txfm_size_16x16;
- mbmi->mode = mode16x16;
- rate = rate16x16 + rateuv[mbmi->txfm_size != TX_4X4];
- dist = dist16x16 + (distuv[mbmi->txfm_size != TX_4X4] >> 2);
- mbmi->uv_mode = modeuv[mbmi->txfm_size != TX_4X4];
- }
- rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
- } else {
- if (error4x4 < error8x8) {
- rate = rateuv[TX_4X4] + rate4x4;
- mbmi->mode = I4X4_PRED;
- mbmi->txfm_size = TX_4X4;
- dist = dist4x4 + (distuv[TX_4X4] >> 2);
- mbmi->uv_mode = modeuv[TX_4X4];
- } else {
- mbmi->mode = I8X8_PRED;
- mbmi->txfm_size = txfm_size_8x8;
- set_i8x8_block_modes(x, mode8x8);
- rate = rate8x8 + rateuv[TX_4X4];
- dist = dist8x8 + (distuv[TX_4X4] >> 2);
- }
- rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0);
- }
-
- for (i = 0; i < NB_TXFM_MODES; i++) {
- x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] =
- txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i];
- }
-
- *returnrate = rate;
- *returndist = dist;
-}
-
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col,
int *returnrate,
@@ -4272,7 +2468,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int mode_mask = 0;
int64_t mode_distortions[MB_MODE_COUNT] = {-1};
int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
+ int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
+ cpi->common.y_dc_delta_q);
+ int_mv seg_mvs[4][MAX_REF_FRAMES - 1];
+ union b_mode_info best_bmodes[4];
+ PARTITION_INFO best_partition;
+
+ for (i = 0; i < 4; i++) {
+ int j;
+ for (j = 0; j < MAX_REF_FRAMES - 1; j++)
+ seg_mvs[i][j].as_int = INVALID_MV;
+ }
// Everywhere the flag is set the error is much higher than its neighbors.
ctx->frames_with_high_error = 0;
ctx->modes_with_high_error = 0;
@@ -4330,7 +2537,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->Speed == 0
|| (cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) {
mbmi->mode = DC_PRED;
- for (i = 0; i <= ((bsize < BLOCK_SIZE_SB64X64) ? TX_16X16 : TX_32X32);
+ for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 :
+ (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 :
+ (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32)));
i++) {
mbmi->txfm_size = i;
rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i],
@@ -4362,6 +2571,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
x->skip = 0;
this_mode = vp9_mode_order[mode_index].mode;
ref_frame = vp9_mode_order[mode_index].ref_frame;
+
if (!(ref_frame == INTRA_FRAME
|| (cpi->ref_frame_flags & flag_list[ref_frame]))) {
continue;
@@ -4382,6 +2592,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame = ref_frame;
mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
+
+ // TODO(jingning, jkoleszar): scaling reference frame not supported for
+ // SPLITMV.
+ if (mbmi->ref_frame > 0 &&
+ (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode == SPLITMV)
+ continue;
+
+ if (mbmi->second_ref_frame > 0 &&
+ (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 ||
+ yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) &&
+ this_mode == SPLITMV)
+ continue;
+
set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
scale_factor);
comp_pred = mbmi->second_ref_frame > INTRA_FRAME;
@@ -4400,9 +2625,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
// continue;
- if (this_mode == I8X8_PRED ||
- this_mode == I4X4_PRED ||
- this_mode == SPLITMV)
+ if (bsize != BLOCK_SIZE_SB8X8 &&
+ (this_mode == I4X4_PRED || this_mode == SPLITMV))
continue;
// if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME)
// continue;
@@ -4465,13 +2689,33 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if (ref_frame == INTRA_FRAME) {
+ if (this_mode == I4X4_PRED) {
+ int rate;
+
+ // Note the rate value returned here includes the cost of coding
+ // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED];
+ assert(bsize == BLOCK_SIZE_SB8X8);
+ mbmi->txfm_size = TX_4X4;
+ rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y,
+ &distortion_y, INT64_MAX);
+ rate2 += rate;
+ rate2 += intra_cost_penalty;
+ distortion2 += distortion_y;
+
+ rate2 += rate_uv_intra[TX_4X4];
+ rate_uv = rate_uv_intra[TX_4X4];
+ distortion2 += dist_uv[TX_4X4];
+ distortion_uv = dist_uv[TX_4X4];
+ mbmi->uv_mode = mode_uv[TX_4X4];
+ } else if (ref_frame == INTRA_FRAME) {
TX_SIZE uv_tx;
vp9_build_intra_predictors_sby_s(xd, bsize);
super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
bsize, txfm_cache);
uv_tx = mbmi->txfm_size;
+ if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8)
+ uv_tx = TX_4X4;
if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16)
uv_tx = TX_8X8;
else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32)
@@ -4483,7 +2727,137 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->uv_mode = mode_uv[uv_tx];
rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv;
+ if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
+ rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
+ } else if (this_mode == SPLITMV) {
+ const int is_comp_pred = mbmi->second_ref_frame > 0;
+ int rate, distortion;
+ int64_t this_rd_thresh;
+ int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
+ int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
+ int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0;
+ int switchable_filter_index;
+ int_mv *second_ref = is_comp_pred ?
+ &mbmi->ref_mvs[mbmi->second_ref_frame][0] : NULL;
+ union b_mode_info tmp_best_bmodes[16];
+ MB_MODE_INFO tmp_best_mbmode;
+ PARTITION_INFO tmp_best_partition;
+ int pred_exists = 0;
+ int uv_skippable;
+
+ this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ?
+ cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA];
+ this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ?
+ cpi->rd_threshes[THR_NEWG] : this_rd_thresh;
+ xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+
+ for (switchable_filter_index = 0;
+ switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+ ++switchable_filter_index) {
+ int newbest;
+ mbmi->interp_filter =
+ vp9_switchable_interp[switchable_filter_index];
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
+ &mbmi->ref_mvs[mbmi->ref_frame][0],
+ second_ref, INT64_MAX, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs);
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ newbest = (tmp_rd < tmp_best_rd);
+ if (newbest) {
+ tmp_best_filter = mbmi->interp_filter;
+ tmp_best_rd = tmp_rd;
+ }
+ if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
+ (mbmi->interp_filter == cm->mcomp_filter_type &&
+ cm->mcomp_filter_type != SWITCHABLE)) {
+ tmp_best_rdu = tmp_rd;
+ tmp_best_rate = rate;
+ tmp_best_ratey = rate_y;
+ tmp_best_distortion = distortion;
+ tmp_best_skippable = skippable;
+ vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&tmp_best_partition, x->partition_info,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 4; i++) {
+ tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
+ }
+ pred_exists = 1;
+ }
+ } // switchable_filter_index loop
+
+ mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
+ tmp_best_filter : cm->mcomp_filter_type);
+ vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+ if (!pred_exists) {
+ // Handles the special case when a filter that is not in the
+ // switchable list (bilinear, 6-tap) is indicated at the frame level
+ tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
+ &mbmi->ref_mvs[mbmi->ref_frame][0],
+ second_ref, INT64_MAX, mdcounts,
+ &rate, &rate_y, &distortion,
+ &skippable,
+ (int)this_rd_thresh, seg_mvs);
+ } else {
+ if (cpi->common.mcomp_filter_type == SWITCHABLE) {
+ int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd,
+ PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+ tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
+ }
+ tmp_rd = tmp_best_rdu;
+ rate = tmp_best_rate;
+ rate_y = tmp_best_ratey;
+ distortion = tmp_best_distortion;
+ skippable = tmp_best_skippable;
+ vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO));
+ vpx_memcpy(x->partition_info, &tmp_best_partition,
+ sizeof(PARTITION_INFO));
+ for (i = 0; i < 4; i++) {
+ xd->mode_info_context->bmi[i] = tmp_best_bmodes[i];
+ }
+ }
+
+ rate2 += rate;
+ distortion2 += distortion;
+
+ if (cpi->common.mcomp_filter_type == SWITCHABLE)
+ rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs
+ [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)]
+ [vp9_switchable_interp_map[mbmi->interp_filter]];
+
+ // If even the 'Y' rd value of split is higher than best so far
+ // then dont bother looking at UV
+ vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
+ bsize);
+ vp9_subtract_sbuv(x, bsize);
+ super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv,
+ &uv_skippable, bsize, TX_4X4);
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ skippable = skippable && uv_skippable;
+
+ if (!mode_excluded) {
+ if (is_comp_pred)
+ mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
+ else
+ mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
+ }
+
+ compmode_cost =
+ vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred);
+ mbmi->mode = this_mode;
} else {
YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
int fb;
@@ -4640,6 +3014,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
*returndistortion = distortion2;
best_rd = this_rd;
vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO));
+ vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
+
+ if (this_mode == I4X4_PRED || this_mode == SPLITMV) {
+ for (i = 0; i < 4; i++) {
+ best_bmodes[i] = xd->mode_info_context->bmi[i];
+ }
+ }
}
#if 0
// Testing this mode gave rise to an improvement in best error score.
@@ -4693,6 +3074,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
/* keep record of best txfm size */
+ if (bsize < BLOCK_SIZE_SB32X32) {
+ if (bsize < BLOCK_SIZE_MB16X16) {
+ if (this_mode == SPLITMV || this_mode == I4X4_PRED)
+ txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4];
+ txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8];
+ }
+ txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16];
+ }
if (!mode_excluded && this_rd != INT64_MAX) {
for (i = 0; i < NB_TXFM_MODES; i++) {
int64_t adj_rd;
@@ -4769,13 +3158,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
(best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
mbmi->mode = ZEROMV;
mbmi->ref_frame = ALTREF_FRAME;
- mbmi->second_ref_frame = INTRA_FRAME;
+ mbmi->second_ref_frame = NONE;
mbmi->mv[0].as_int = 0;
mbmi->uv_mode = DC_PRED;
mbmi->mb_skip_coeff = 1;
- mbmi->partitioning = 0;
- mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ?
- TX_32X32 : cm->txfm_mode;
+ if (cm->txfm_mode == TX_MODE_SELECT) {
+ if (bsize >= BLOCK_SIZE_SB32X32)
+ mbmi->txfm_size = TX_32X32;
+ else if (bsize >= BLOCK_SIZE_MB16X16)
+ mbmi->txfm_size = TX_16X16;
+ else
+ mbmi->txfm_size = TX_8X8;
+ }
vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff));
vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff));
@@ -4784,6 +3178,26 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// macroblock modes
vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+ if (best_mbmode.mode == I4X4_PRED) {
+ for (i = 0; i < 4; i++) {
+ xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode;
+ }
+ }
+
+ if (best_mbmode.mode == SPLITMV) {
+ for (i = 0; i < 4; i++)
+ xd->mode_info_context->bmi[i].as_mv[0].as_int =
+ best_bmodes[i].as_mv[0].as_int;
+ if (mbmi->second_ref_frame > 0)
+ for (i = 0; i < 4; i++)
+ xd->mode_info_context->bmi[i].as_mv[1].as_int =
+ best_bmodes[i].as_mv[1].as_int;
+
+ vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
+
+ mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int;
+ mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int;
+ }
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
@@ -4806,7 +3220,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
end:
set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame,
scale_factor);
- store_coding_context(x, ctx, best_mode_index, NULL,
+ store_coding_context(x, ctx, best_mode_index,
+ &best_partition,
&mbmi->ref_mvs[mbmi->ref_frame][0],
&mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 :
mbmi->second_ref_frame][0],
@@ -4814,41 +3229,3 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}
-
-void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int mi_row, int mi_col,
- int *totalrate, int *totaldist) {
- MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
- int rate, distortion;
- int64_t intra_error = 0;
- unsigned char *segment_id = &mbmi->segment_id;
-
- if (xd->segmentation_enabled)
- x->encode_breakout = cpi->segment_encode_breakout[*segment_id];
- else
- x->encode_breakout = cpi->oxcf.encode_breakout;
-
- // if (cpi->sf.RD)
- // For now this codebase is limited to a single rd encode path
- {
- int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
-
- rd_pick_inter_mode(cpi, x, mi_row, mi_col, &rate,
- &distortion, &intra_error);
-
- /* restore cpi->zbin_mode_boost_enabled */
- cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
- }
- // else
- // The non rd encode path has been deleted from this code base
- // to simplify development
- // vp9_pick_inter_mode
-
- // Store metrics so they can be added in to totals if this mode is picked
- x->mb_context[xd->sb_index][xd->mb_index].distortion = distortion;
- x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error;
-
- *totalrate = rate;
- *totaldist = distortion;
-}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index eef2a4fe9..dcf5d00e9 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -19,17 +19,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex);
void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex);
-void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x,
- int *r, int *d);
-
void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int *r, int *d, BLOCK_SIZE_TYPE bsize,
PICK_MODE_CONTEXT *ctx);
-void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x,
- int mi_row, int mi_col,
- int *r, int *d);
-
int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col,
int *r, int *d, BLOCK_SIZE_TYPE bsize,
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 7f792ae2b..fe995ad72 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -16,18 +16,15 @@
#include "vp9/common/vp9_tile_common.h"
void vp9_enable_segmentation(VP9_PTR ptr) {
- VP9_COMP *cpi = (VP9_COMP *)(ptr);
+ VP9_COMP *cpi = (VP9_COMP *)ptr;
- // Set the appropriate feature bit
cpi->mb.e_mbd.segmentation_enabled = 1;
cpi->mb.e_mbd.update_mb_segmentation_map = 1;
cpi->mb.e_mbd.update_mb_segmentation_data = 1;
}
void vp9_disable_segmentation(VP9_PTR ptr) {
- VP9_COMP *cpi = (VP9_COMP *)(ptr);
-
- // Clear the appropriate feature bit
+ VP9_COMP *cpi = (VP9_COMP *)ptr;
cpi->mb.e_mbd.segmentation_enabled = 0;
}
@@ -60,61 +57,57 @@ void vp9_set_segment_data(VP9_PTR ptr,
}
// Based on set of segment counts calculate a probability tree
-static void calc_segtree_probs(MACROBLOCKD *xd,
- int *segcounts,
+static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts,
vp9_prob *segment_tree_probs) {
// Work out probabilities of each segment
- segment_tree_probs[0] =
- get_binary_prob(segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3],
- segcounts[4] + segcounts[5] + segcounts[6] + segcounts[7]);
- segment_tree_probs[1] =
- get_binary_prob(segcounts[0] + segcounts[1], segcounts[2] + segcounts[3]);
- segment_tree_probs[2] = get_binary_prob(segcounts[0], segcounts[1]);
- segment_tree_probs[3] = get_binary_prob(segcounts[2], segcounts[3]);
- segment_tree_probs[4] =
- get_binary_prob(segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]);
+ const int c01 = segcounts[0] + segcounts[1];
+ const int c23 = segcounts[2] + segcounts[3];
+ const int c45 = segcounts[4] + segcounts[5];
+ const int c67 = segcounts[6] + segcounts[7];
+
+ segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67);
+ segment_tree_probs[1] = get_binary_prob(c01, c23);
+ segment_tree_probs[2] = get_binary_prob(c45, c67);
+ segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]);
+ segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]);
segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]);
segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]);
}
// Based on set of segment counts and probabilities calculate a cost estimate
-static int cost_segmap(MACROBLOCKD *xd,
- int *segcounts,
- vp9_prob *probs) {
- int cost;
- int count1, count2;
+static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) {
+ const int c01 = segcounts[0] + segcounts[1];
+ const int c23 = segcounts[2] + segcounts[3];
+ const int c45 = segcounts[4] + segcounts[5];
+ const int c67 = segcounts[6] + segcounts[7];
+ const int c0123 = c01 + c23;
+ const int c4567 = c45 + c67;
// Cost the top node of the tree
- count1 = segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3];
- count2 = segcounts[3] + segcounts[4] + segcounts[5] + segcounts[6];
- cost = count1 * vp9_cost_zero(probs[0]) +
- count2 * vp9_cost_one(probs[0]);
+ int cost = c0123 * vp9_cost_zero(probs[0]) +
+ c4567 * vp9_cost_one(probs[0]);
// Cost subsequent levels
- if (count1 > 0) {
- count1 = segcounts[0] + segcounts[1];
- count2 = segcounts[2] + segcounts[3];
- cost += count1 * vp9_cost_zero(probs[1]) +
- count2 * vp9_cost_one(probs[1]);
-
- if (count1 > 0)
- cost += segcounts[0] * vp9_cost_zero(probs[2]) +
- segcounts[1] * vp9_cost_one(probs[2]);
- if (count2 > 0)
- cost += segcounts[2] * vp9_cost_zero(probs[3]) +
- segcounts[3] * vp9_cost_one(probs[3]);
+ if (c0123 > 0) {
+ cost += c01 * vp9_cost_zero(probs[1]) +
+ c23 * vp9_cost_one(probs[1]);
+
+ if (c01 > 0)
+ cost += segcounts[0] * vp9_cost_zero(probs[3]) +
+ segcounts[1] * vp9_cost_one(probs[3]);
+ if (c23 > 0)
+ cost += segcounts[2] * vp9_cost_zero(probs[4]) +
+ segcounts[3] * vp9_cost_one(probs[4]);
}
- if (count2 > 0) {
- count1 = segcounts[4] + segcounts[5];
- count2 = segcounts[6] + segcounts[7];
- cost += count1 * vp9_cost_zero(probs[4]) +
- count2 * vp9_cost_one(probs[4]);
+ if (c4567 > 0) {
+ cost += c45 * vp9_cost_zero(probs[2]) +
+ c67 * vp9_cost_one(probs[2]);
- if (count1 > 0)
+ if (c45 > 0)
cost += segcounts[4] * vp9_cost_zero(probs[5]) +
segcounts[5] * vp9_cost_one(probs[5]);
- if (count2 > 0)
+ if (c67 > 0)
cost += segcounts[6] * vp9_cost_zero(probs[6]) +
segcounts[7] * vp9_cost_one(probs[6]);
}
@@ -130,11 +123,12 @@ static void count_segs(VP9_COMP *cpi,
int bw, int bh, int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- const int segment_id = mi->mbmi.segment_id;
+ int segment_id;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
+ segment_id = mi->mbmi.segment_id;
xd->mode_info_context = mi;
set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw);
@@ -199,9 +193,11 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi,
assert(bwl < bsl && bhl < bsl);
if (bsize == BLOCK_SIZE_SB64X64) {
subsize = BLOCK_SIZE_SB32X32;
- } else {
- assert(bsize == BLOCK_SIZE_SB32X32);
+ } else if (bsize == BLOCK_SIZE_SB32X32) {
subsize = BLOCK_SIZE_MB16X16;
+ } else {
+ assert(bsize == BLOCK_SIZE_MB16X16);
+ subsize = BLOCK_SIZE_SB8X8;
}
for (n = 0; n < 4; n++) {
@@ -238,10 +234,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// Set default state for the segment tree probabilities and the
// temporal coding probabilities
- vpx_memset(xd->mb_segment_tree_probs, 255,
- sizeof(xd->mb_segment_tree_probs));
- vpx_memset(cm->segment_pred_probs, 255,
- sizeof(cm->segment_pred_probs));
+ vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
+ vpx_memset(cm->segment_pred_probs, 255, sizeof(cm->segment_pred_probs));
vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts));
vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts));
@@ -249,16 +243,15 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// First of all generate stats regarding how well the last segment map
// predicts this one
-
for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) {
vp9_get_tile_col_offsets(cm, tile_col);
mi_ptr = cm->mi + cm->cur_tile_mi_col_start;
for (mi_row = 0; mi_row < cm->mi_rows;
- mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) {
+ mi_row += 8, mi_ptr += 8 * mis) {
mi = mi_ptr;
for (mi_col = cm->cur_tile_mi_col_start;
mi_col < cm->cur_tile_mi_col_end;
- mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) {
+ mi_col += 8, mi += 8) {
count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count,
t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64);
}
@@ -279,27 +272,24 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) {
// Add in the cost of the signalling for each prediction context
for (i = 0; i < PREDICTION_PROBS; i++) {
- t_nopred_prob[i] = get_binary_prob(temporal_predictor_count[i][0],
- temporal_predictor_count[i][1]);
+ const int count0 = temporal_predictor_count[i][0];
+ const int count1 = temporal_predictor_count[i][1];
+
+ t_nopred_prob[i] = get_binary_prob(count0, count1);
// Add in the predictor signaling cost
- t_pred_cost += (temporal_predictor_count[i][0] *
- vp9_cost_zero(t_nopred_prob[i])) +
- (temporal_predictor_count[i][1] *
- vp9_cost_one(t_nopred_prob[i]));
+ t_pred_cost += count0 * vp9_cost_zero(t_nopred_prob[i]) +
+ count1 * vp9_cost_one(t_nopred_prob[i]);
}
}
// Now choose which coding method to use.
if (t_pred_cost < no_pred_cost) {
cm->temporal_update = 1;
- vpx_memcpy(xd->mb_segment_tree_probs,
- t_pred_tree, sizeof(t_pred_tree));
- vpx_memcpy(&cm->segment_pred_probs,
- t_nopred_prob, sizeof(t_nopred_prob));
+ vpx_memcpy(xd->mb_segment_tree_probs, t_pred_tree, sizeof(t_pred_tree));
+ vpx_memcpy(cm->segment_pred_probs, t_nopred_prob, sizeof(t_nopred_prob));
} else {
cm->temporal_update = 0;
- vpx_memcpy(xd->mb_segment_tree_probs,
- no_pred_tree, sizeof(no_pred_tree));
+ vpx_memcpy(xd->mb_segment_tree_probs, no_pred_tree, sizeof(no_pred_tree));
}
}
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 1e6b9840b..6bd8b5036 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -481,7 +481,7 @@ void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame,
// Note: this_frame->frame has been updated in the loop
// so it now points at the ARF frame.
half_gf_int = cpi->baseline_gf_interval >> 1;
- frames_after_arf = (int)(cpi->twopass.total_stats->count - this_frame - 1);
+ frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1);
switch (cpi->oxcf.arnr_type) {
case 1: // Backward filter
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 3c3367071..9a6598581 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -112,8 +112,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
PLANE_TYPE type = plane ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC;
TX_SIZE tx_size = ss_txfrm_size / 2;
int dry_run = args->dry_run;
- int ib = old_block_idx_4x4(xd, b_width_log2(bsize) + b_height_log2(bsize),
- plane, block);
MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
int pt; /* near block/prev token context index */
@@ -158,7 +156,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
default:
case TX_4X4: {
tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
- get_tx_type_4x4(xd, ib) : DCT_DCT;
+ get_tx_type_4x4(xd, block) : DCT_DCT;
above_ec = A[0] != 0;
left_ec = L[0] != 0;
seg_eob = 16;
@@ -173,7 +171,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
}
case TX_8X8: {
const int sz = 1 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
+ const int x = block & ((1 << sz) - 1), y = block - x;
tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT;
above_ec = (A[0] + A[1]) != 0;
@@ -190,7 +188,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
}
case TX_16X16: {
const int sz = 2 + b_width_log2(sb_type);
- const int x = ib & ((1 << sz) - 1), y = ib - x;
+ const int x = block & ((1 << sz) - 1), y = block - x;
tx_type = (type == PLANE_TYPE_Y_WITH_DC) ?
get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT;
above_ec = (A[0] + A[1] + A[2] + A[3]) != 0;
@@ -376,7 +374,8 @@ int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
int result = 1;
struct is_skippable_args args = {xd, &result};
- foreach_transformed_block_in_plane(xd, bsize, 0, 0, is_skippable, &args);
+ foreach_transformed_block_in_plane(xd, bsize, 0,
+ is_skippable, &args);
return result;
}
diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c
index c4c70df43..c2a600408 100644
--- a/vp9/encoder/vp9_variance_c.c
+++ b/vp9/encoder/vp9_variance_c.c
@@ -239,6 +239,32 @@ unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
return (var - (((unsigned int)avg * avg) >> 6));
}
+unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
+}
+
+unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
+ int source_stride,
+ const uint8_t *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 5));
+}
+
unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
diff --git a/vp9/encoder/x86/vp9_quantize_mmx.asm b/vp9/encoder/x86/vp9_quantize_mmx.asm
deleted file mode 100644
index 22e235610..000000000
--- a/vp9/encoder/x86/vp9_quantize_mmx.asm
+++ /dev/null
@@ -1,286 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
-; short *qcoeff_ptr,short *dequant_ptr,
-; short *scan_mask, short *round_ptr,
-; short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE
-sym(vp9_fast_quantize_b_impl_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- push rsi
- push rdi
- ; end prolog
-
-
- mov rsi, arg(0) ;coeff_ptr
- movq mm0, [rsi]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm1, [rax]
-
- movq mm3, mm0
- psraw mm0, 15
-
- pxor mm3, mm0
- psubw mm3, mm0 ; abs
-
- movq mm2, mm3
- pcmpgtw mm1, mm2
-
- pandn mm1, mm2
- movq mm3, mm1
-
- mov rdx, arg(6) ;quant_ptr
- movq mm1, [rdx]
-
- mov rcx, arg(5) ;round_ptr
- movq mm2, [rcx]
-
- paddw mm3, mm2
- pmulhuw mm3, mm1
-
- pxor mm3, mm0
- psubw mm3, mm0 ;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
- movq mm0, mm3
-
- movq [rdi], mm3
-
- mov rax, arg(3) ;dequant_ptr
- movq mm2, [rax]
-
- pmullw mm3, mm2
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax], mm3
-
- ; next 8
- movq mm4, [rsi+8]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+8]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+8]
- movq mm6, [rcx+8]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+8], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+8]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+8], mm7
-
-
- ; next 8
- movq mm4, [rsi+16]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+16]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+16]
- movq mm6, [rcx+16]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+16], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+16]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+16], mm7
-
-
- ; next 8
- movq mm4, [rsi+24]
-
- mov rax, arg(1) ;zbin_ptr
- movq mm5, [rax+24]
-
- movq mm7, mm4
- psraw mm4, 15
-
- pxor mm7, mm4
- psubw mm7, mm4 ; abs
-
- movq mm6, mm7
- pcmpgtw mm5, mm6
-
- pandn mm5, mm6
- movq mm7, mm5
-
- movq mm5, [rdx+24]
- movq mm6, [rcx+24]
-
- paddw mm7, mm6
- pmulhuw mm7, mm5
-
- pxor mm7, mm4
- psubw mm7, mm4;gain the sign back
-
- mov rdi, arg(2) ;qcoeff_ptr
-
- movq mm1, mm7
- movq [rdi+24], mm7
-
- mov rax, arg(3) ;dequant_ptr
- movq mm6, [rax+24]
-
- pmullw mm7, mm6
- mov rax, arg(7) ;dqcoeff_ptr
-
- movq [rax+24], mm7
-
-
-
- mov rdi, arg(4) ;scan_mask
- mov rsi, arg(2) ;qcoeff_ptr
-
- pxor mm5, mm5
- pxor mm7, mm7
-
- movq mm0, [rsi]
- movq mm1, [rsi+8]
-
- movq mm2, [rdi]
- movq mm3, [rdi+8];
-
- pcmpeqw mm0, mm7
- pcmpeqw mm1, mm7
-
- pcmpeqw mm6, mm6
- pxor mm0, mm6
-
- pxor mm1, mm6
- psrlw mm0, 15
-
- psrlw mm1, 15
- pmaddwd mm0, mm2
-
- pmaddwd mm1, mm3
- movq mm5, mm0
-
- paddd mm5, mm1
-
- movq mm0, [rsi+16]
- movq mm1, [rsi+24]
-
- movq mm2, [rdi+16]
- movq mm3, [rdi+24];
-
- pcmpeqw mm0, mm7
- pcmpeqw mm1, mm7
-
- pcmpeqw mm6, mm6
- pxor mm0, mm6
-
- pxor mm1, mm6
- psrlw mm0, 15
-
- psrlw mm1, 15
- pmaddwd mm0, mm2
-
- pmaddwd mm1, mm3
- paddd mm5, mm0
-
- paddd mm5, mm1
- movq mm0, mm5
-
- psrlq mm5, 32
- paddd mm0, mm5
-
- ; eob adjustment begins here
- movq rcx, mm0
- and rcx, 0xffff
-
- xor rdx, rdx
- sub rdx, rcx ; rdx=-rcx
-
- bsr rax, rcx
- inc rax
-
- sar rdx, 31
- and rax, rdx
- ; Substitute the sse assembly for the old mmx mixed assembly/C. The
- ; following is kept as reference
- ; movq rcx, mm0
- ; bsr rax, rcx
- ;
- ; mov eob, rax
- ; mov eee, rcx
- ;
- ;if(eee==0)
- ;{
- ; eob=-1;
- ;}
- ;else if(eee<0)
- ;{
- ; eob=15;
- ;}
- ;d->eob = eob+1;
-
- ; begin epilog
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm
deleted file mode 100644
index 700e64b1f..000000000
--- a/vp9/encoder/x86/vp9_quantize_sse2.asm
+++ /dev/null
@@ -1,379 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_regular_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_regular_quantize_b_sse2) PRIVATE
-sym(vp9_regular_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- SAVE_XMM 7
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %endif
-%endif
-
- ALIGN_STACK 16, rax
- %define zrun_zbin_boost 0 ; 8
- %define abs_minus_zbin 8 ; 32
- %define temp_qcoeff 40 ; 32
- %define qcoeff 72 ; 32
- %define stack_size 104
- sub rsp, stack_size
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr
- mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr
- movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value
-
- ; z
- movdqa xmm0, [rdx]
- movdqa xmm4, [rdx + 16]
- mov rdx, [rdi + vp9_block_round] ; round_ptr
-
- pshuflw xmm7, xmm7, 0
- punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; (z ^ sz)
- pxor xmm1, xmm0
- pxor xmm5, xmm4
-
- ; x = abs(z)
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
- mov rcx, [rdi + vp9_block_quant] ; quant_ptr
-
- ; *zbin_ptr + zbin_oq_value
- paddw xmm2, xmm7
- paddw xmm3, xmm7
-
- ; x - (*zbin_ptr + zbin_oq_value)
- psubw xmm1, xmm2
- psubw xmm5, xmm3
- movdqa [rsp + abs_minus_zbin], xmm1
- movdqa [rsp + abs_minus_zbin + 16], xmm5
-
- ; add (zbin_ptr + zbin_oq_value) back
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- movdqa xmm2, [rdx]
- movdqa xmm6, [rdx + 16]
-
- movdqa xmm3, [rcx]
- movdqa xmm7, [rcx + 16]
-
- ; x + round
- paddw xmm1, xmm2
- paddw xmm5, xmm6
-
- ; y = x * quant_ptr >> 16
- pmulhw xmm3, xmm1
- pmulhw xmm7, xmm5
-
- ; y += x
- paddw xmm1, xmm3
- paddw xmm5, xmm7
-
- movdqa [rsp + temp_qcoeff], xmm1
- movdqa [rsp + temp_qcoeff + 16], xmm5
-
- pxor xmm6, xmm6
- ; zero qcoeff
- movdqa [rsp + qcoeff], xmm6
- movdqa [rsp + qcoeff + 16], xmm6
-
- mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr
- mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr
- mov [rsp + zrun_zbin_boost], rdx
-
-%macro ZIGZAG_LOOP 1
- ; x
- movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2]
-
- ; if (x >= zbin)
- sub cx, WORD PTR[rdx] ; x - zbin
- lea rdx, [rdx + 2] ; zbin_boost_ptr++
- jl .rq_zigzag_loop_%1 ; x < zbin
-
- movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2]
-
- ; downshift by quant_shift[rc]
- movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc]
- sar edi, cl ; also sets Z bit
- je .rq_zigzag_loop_%1 ; !y
- mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc]
- mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost
-.rq_zigzag_loop_%1:
-%endmacro
-; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
-ZIGZAG_LOOP 0
-ZIGZAG_LOOP 1
-ZIGZAG_LOOP 4
-ZIGZAG_LOOP 8
-ZIGZAG_LOOP 5
-ZIGZAG_LOOP 2
-ZIGZAG_LOOP 3
-ZIGZAG_LOOP 6
-ZIGZAG_LOOP 9
-ZIGZAG_LOOP 12
-ZIGZAG_LOOP 13
-ZIGZAG_LOOP 10
-ZIGZAG_LOOP 7
-ZIGZAG_LOOP 11
-ZIGZAG_LOOP 14
-ZIGZAG_LOOP 15
-
- movdqa xmm2, [rsp + qcoeff]
- movdqa xmm3, [rsp + qcoeff + 16]
-
- mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr
- mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr
-
- ; y ^ sz
- pxor xmm2, xmm0
- pxor xmm3, xmm4
- ; x = (y ^ sz) - sz
- psubw xmm2, xmm0
- psubw xmm3, xmm4
-
- ; dequant
- movdqa xmm0, [rcx]
- movdqa xmm1, [rcx + 16]
-
- mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr
-
- pmullw xmm0, xmm2
- pmullw xmm1, xmm3
-
- movdqa [rcx], xmm2 ; store qcoeff
- movdqa [rcx + 16], xmm3
- movdqa [rdi], xmm0 ; store dqcoeff
- movdqa [rdi + 16], xmm1
-
- ; select the last value (in zig_zag order) for EOB
- pcmpeqw xmm2, xmm6
- pcmpeqw xmm3, xmm6
- ; !
- pcmpeqw xmm6, xmm6
- pxor xmm2, xmm6
- pxor xmm3, xmm6
- ; mask inv_zig_zag
- pand xmm2, [GLOBAL(inv_zig_zag)]
- pand xmm3, [GLOBAL(inv_zig_zag + 16)]
- ; select the max value
- pmaxsw xmm2, xmm3
- pshufd xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00001110b
- pmaxsw xmm2, xmm3
- pshuflw xmm3, xmm2, 00000001b
- pmaxsw xmm2, xmm3
- movd eax, xmm2
- and eax, 0xff
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
- add rsp, stack_size
- pop rsp
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
- RESTORE_GOT
- RESTORE_XMM
- pop rbp
- ret
-
-; void vp9_fast_quantize_b_sse2 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_fast_quantize_b_sse2) PRIVATE
-sym(vp9_fast_quantize_b_sse2):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %else
- ; these registers are used for passing arguments
- %endif
-%endif
-
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_round]
- mov rdx, [rdi + vp9_block_quant_fast]
-
- ; z = coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; dup z so we can save sz
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- ; x = abs(z) = (z ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; x += round
- paddw xmm1, [rcx]
- paddw xmm5, [rcx + 16]
-
- mov rax, [rsi + vp9_blockd_qcoeff]
- mov rcx, [rsi + vp9_blockd_dequant]
- mov rdi, [rsi + vp9_blockd_dqcoeff]
-
- ; y = x * quant >> 16
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- ; x = (y ^ sz) - sz
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- ; qcoeff = x
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- ; x * dequant
- movdqa xmm2, xmm1
- movdqa xmm3, xmm5
- pmullw xmm2, [rcx]
- pmullw xmm3, [rcx + 16]
-
- ; dqcoeff = x * dequant
- movdqa [rdi], xmm2
- movdqa [rdi + 16], xmm3
-
- pxor xmm4, xmm4 ;clear all bits
- pcmpeqw xmm1, xmm4
- pcmpeqw xmm5, xmm4
-
- pcmpeqw xmm4, xmm4 ;set all bits
- pxor xmm1, xmm4
- pxor xmm5, xmm4
-
- pand xmm1, [GLOBAL(inv_zig_zag)]
- pand xmm5, [GLOBAL(inv_zig_zag + 16)]
-
- pmaxsw xmm1, xmm5
-
- ; now down to 8
- pshufd xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; only 4 left
- pshuflw xmm5, xmm1, 00001110b
-
- pmaxsw xmm1, xmm5
-
- ; okay, just 2!
- pshuflw xmm5, xmm1, 00000001b
-
- pmaxsw xmm1, xmm5
-
- movd eax, xmm1
- and eax, 0xff
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-inv_zig_zag:
- dw 0x0001, 0x0002, 0x0006, 0x0007
- dw 0x0003, 0x0005, 0x0008, 0x000d
- dw 0x0004, 0x0009, 0x000c, 0x000e
- dw 0x000a, 0x000b, 0x000f, 0x0010
diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm
deleted file mode 100644
index 4c14e5ffe..000000000
--- a/vp9/encoder/x86/vp9_quantize_sse4.asm
+++ /dev/null
@@ -1,253 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_regular_quantize_b_sse4 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-
-global sym(vp9_regular_quantize_b_sse4) PRIVATE
-sym(vp9_regular_quantize_b_sse4):
-
-%if ABI_IS_32BIT
- push rbp
- mov rbp, rsp
- GET_GOT rbx
- push rdi
- push rsi
-
- ALIGN_STACK 16, rax
- %define qcoeff 0 ; 32
- %define stack_size 32
- sub rsp, stack_size
-%else
- %if LIBVPX_YASM_WIN64
- SAVE_XMM 8, u
- push rdi
- push rsi
- %endif
-%endif
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_zbin]
- mov rdx, [rdi + vp9_block_round]
- movd xmm7, [rdi + vp9_block_zbin_extra]
-
- ; z
- movdqa xmm0, [rax]
- movdqa xmm1, [rax + 16]
-
- ; duplicate zbin_oq_value
- pshuflw xmm7, xmm7, 0
- punpcklwd xmm7, xmm7
-
- movdqa xmm2, xmm0
- movdqa xmm3, xmm1
-
- ; sz
- psraw xmm0, 15
- psraw xmm1, 15
-
- ; (z ^ sz)
- pxor xmm2, xmm0
- pxor xmm3, xmm1
-
- ; x = abs(z)
- psubw xmm2, xmm0
- psubw xmm3, xmm1
-
- ; zbin
- movdqa xmm4, [rcx]
- movdqa xmm5, [rcx + 16]
-
- ; *zbin_ptr + zbin_oq_value
- paddw xmm4, xmm7
- paddw xmm5, xmm7
-
- movdqa xmm6, xmm2
- movdqa xmm7, xmm3
-
- ; x - (*zbin_ptr + zbin_oq_value)
- psubw xmm6, xmm4
- psubw xmm7, xmm5
-
- ; round
- movdqa xmm4, [rdx]
- movdqa xmm5, [rdx + 16]
-
- mov rax, [rdi + vp9_block_quant_shift]
- mov rcx, [rdi + vp9_block_quant]
- mov rdx, [rdi + vp9_block_zrun_zbin_boost]
-
- ; x + round
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
- ; quant
- movdqa xmm4, [rcx]
- movdqa xmm5, [rcx + 16]
-
- ; y = x * quant_ptr >> 16
- pmulhw xmm4, xmm2
- pmulhw xmm5, xmm3
-
- ; y += x
- paddw xmm2, xmm4
- paddw xmm3, xmm5
-
- pxor xmm4, xmm4
-%if ABI_IS_32BIT
- movdqa [rsp + qcoeff], xmm4
- movdqa [rsp + qcoeff + 16], xmm4
-%else
- pxor xmm8, xmm8
-%endif
-
- ; quant_shift
- movdqa xmm5, [rax]
-
- ; zrun_zbin_boost
- mov rax, rdx
-
-%macro ZIGZAG_LOOP 5
- ; x
- pextrw ecx, %4, %2
-
- ; if (x >= zbin)
- sub cx, WORD PTR[rdx] ; x - zbin
- lea rdx, [rdx + 2] ; zbin_boost_ptr++
- jl .rq_zigzag_loop_%1 ; x < zbin
-
- pextrw edi, %3, %2 ; y
-
- ; downshift by quant_shift[rc]
- pextrb ecx, xmm5, %1 ; quant_shift[rc]
- sar edi, cl ; also sets Z bit
- je .rq_zigzag_loop_%1 ; !y
-%if ABI_IS_32BIT
- mov WORD PTR[rsp + qcoeff + %1 *2], di
-%else
- pinsrw %5, edi, %2 ; qcoeff[rc]
-%endif
- mov rdx, rax ; reset to b->zrun_zbin_boost
-.rq_zigzag_loop_%1:
-%endmacro
-; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c
-ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4
-ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8
-ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8
-
- mov rcx, [rsi + vp9_blockd_dequant]
- mov rdi, [rsi + vp9_blockd_dqcoeff]
-
-%if ABI_IS_32BIT
- movdqa xmm4, [rsp + qcoeff]
- movdqa xmm5, [rsp + qcoeff + 16]
-%else
- %define xmm5 xmm8
-%endif
-
- ; y ^ sz
- pxor xmm4, xmm0
- pxor xmm5, xmm1
- ; x = (y ^ sz) - sz
- psubw xmm4, xmm0
- psubw xmm5, xmm1
-
- ; dequant
- movdqa xmm0, [rcx]
- movdqa xmm1, [rcx + 16]
-
- mov rcx, [rsi + vp9_blockd_qcoeff]
-
- pmullw xmm0, xmm4
- pmullw xmm1, xmm5
-
- ; store qcoeff
- movdqa [rcx], xmm4
- movdqa [rcx + 16], xmm5
-
- ; store dqcoeff
- movdqa [rdi], xmm0
- movdqa [rdi + 16], xmm1
-
- ; select the last value (in zig_zag order) for EOB
- pxor xmm6, xmm6
- pcmpeqw xmm4, xmm6
- pcmpeqw xmm5, xmm6
-
- packsswb xmm4, xmm5
- pshufb xmm4, [GLOBAL(zig_zag1d)]
- pmovmskb edx, xmm4
- xor rdi, rdi
- mov eax, -1
- xor dx, ax
- bsr eax, edx
- sub edi, edx
- sar edi, 31
- add eax, 1
- and eax, edi
-
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- add rsp, stack_size
- pop rsp
-
- pop rsi
- pop rdi
- RESTORE_GOT
- pop rbp
-%else
- %undef xmm5
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- RESTORE_XMM
- %endif
-%endif
-
- ret
-
-SECTION_RODATA
-align 16
-; vp9/common/vp9_entropy.c: vp9_default_zig_zag1d
-zig_zag1d:
- db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
deleted file mode 100644
index 1fa052147..000000000
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ /dev/null
@@ -1,137 +0,0 @@
-;
-; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-
-; void vp9_fast_quantize_b_ssse3 | arg
-; (BLOCK *b, | 0
-; BLOCKD *d) | 1
-;
-
-global sym(vp9_fast_quantize_b_ssse3) PRIVATE
-sym(vp9_fast_quantize_b_ssse3):
- push rbp
- mov rbp, rsp
- GET_GOT rbx
-
-%if ABI_IS_32BIT
- push rdi
- push rsi
-%else
- %if LIBVPX_YASM_WIN64
- push rdi
- push rsi
- %endif
-%endif
- ; end prolog
-
-%if ABI_IS_32BIT
- mov rdi, arg(0) ; BLOCK *b
- mov rsi, arg(1) ; BLOCKD *d
-%else
- %if LIBVPX_YASM_WIN64
- mov rdi, rcx ; BLOCK *b
- mov rsi, rdx ; BLOCKD *d
- %else
- ;mov rdi, rdi ; BLOCK *b
- ;mov rsi, rsi ; BLOCKD *d
- %endif
-%endif
-
- mov rax, [rdi + vp9_block_coeff]
- mov rcx, [rdi + vp9_block_round]
- mov rdx, [rdi + vp9_block_quant_fast]
-
- ; coeff
- movdqa xmm0, [rax]
- movdqa xmm4, [rax + 16]
-
- ; round
- movdqa xmm2, [rcx]
- movdqa xmm3, [rcx + 16]
-
- movdqa xmm1, xmm0
- movdqa xmm5, xmm4
-
- ; sz = z >> 15
- psraw xmm0, 15
- psraw xmm4, 15
-
- pabsw xmm1, xmm1
- pabsw xmm5, xmm5
-
- paddw xmm1, xmm2
- paddw xmm5, xmm3
-
- ; quant_fast
- pmulhw xmm1, [rdx]
- pmulhw xmm5, [rdx + 16]
-
- mov rax, [rsi + vp9_blockd_qcoeff]
- mov rdi, [rsi + vp9_blockd_dequant]
- mov rcx, [rsi + vp9_blockd_dqcoeff]
-
- pxor xmm1, xmm0
- pxor xmm5, xmm4
- psubw xmm1, xmm0
- psubw xmm5, xmm4
-
- movdqa [rax], xmm1
- movdqa [rax + 16], xmm5
-
- movdqa xmm2, [rdi]
- movdqa xmm3, [rdi + 16]
-
- pxor xmm4, xmm4
- pmullw xmm2, xmm1
- pmullw xmm3, xmm5
-
- pcmpeqw xmm1, xmm4 ;non zero mask
- pcmpeqw xmm5, xmm4 ;non zero mask
- packsswb xmm1, xmm5
- pshufb xmm1, [GLOBAL(zz_shuf)]
-
- pmovmskb edx, xmm1
-
- xor rdi, rdi
- mov eax, -1
- xor dx, ax ;flip the bits for bsr
- bsr eax, edx
-
- movdqa [rcx], xmm2 ;store dqcoeff
- movdqa [rcx + 16], xmm3 ;store dqcoeff
-
- sub edi, edx ;check for all zeros in bit mask
- sar edi, 31 ;0 or -1
- add eax, 1
- and eax, edi ;if the bit mask was all zero,
- ;then eob = 0
- mov [rsi + vp9_blockd_eob], eax
-
- ; begin epilog
-%if ABI_IS_32BIT
- pop rsi
- pop rdi
-%else
- %if LIBVPX_YASM_WIN64
- pop rsi
- pop rdi
- %endif
-%endif
-
- RESTORE_GOT
- pop rbp
- ret
-
-SECTION_RODATA
-align 16
-zz_shuf:
- db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp9/encoder/x86/vp9_quantize_x86.h b/vp9/encoder/x86/vp9_quantize_x86.h
deleted file mode 100644
index d1db17394..000000000
--- a/vp9/encoder/x86/vp9_quantize_x86.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license and patent
- * grant that can be found in the LICENSE file in the root of the source
- * tree. All contributing project authors may be found in the AUTHORS
- * file in the root of the source tree.
- */
-
-#ifndef VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
-#define VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_
-
-
-/* Note:
- *
- * This platform is commonly built for runtime CPU detection. If you modify
- * any of the function mappings present in this file, be sure to also update
- * them in the function pointer initialization code
- */
-#if HAVE_MMX
-
-#endif /* HAVE_MMX */
-
-
-#if HAVE_SSE2
-extern prototype_quantize_block(vp9_regular_quantize_b_sse2);
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_quantize_quantb
-#define vp9_quantize_quantb vp9_regular_quantize_b_sse2
-#endif /* !CONFIG_RUNTIME_CPU_DETECT */
-
-#endif /* HAVE_SSE2 */
-
-
-#if HAVE_SSE4_1
-extern prototype_quantize_block(vp9_regular_quantize_b_sse4);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_quantize_quantb
-#define vp9_quantize_quantb vp9_regular_quantize_b_sse4
-
-#endif /* !CONFIG_RUNTIME_CPU_DETECT */
-
-#endif /* HAVE_SSE4_1 */
-
-#endif /* QUANTIZE_X86_H */