summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/bitstream.c111
-rw-r--r--vp8/encoder/block.h12
-rw-r--r--vp8/encoder/encodeframe.c733
-rw-r--r--vp8/encoder/encodemb.c31
-rw-r--r--vp8/encoder/generic/csystemdependent.c30
-rw-r--r--vp8/encoder/mcomp.c2
-rw-r--r--vp8/encoder/onyx_if.c55
-rw-r--r--vp8/encoder/onyx_int.h13
-rw-r--r--vp8/encoder/rdopt.c1068
-rw-r--r--vp8/encoder/rdopt.h3
-rw-r--r--vp8/encoder/sad_c.c79
-rw-r--r--vp8/encoder/segmentation.c79
-rw-r--r--vp8/encoder/variance.h78
-rw-r--r--vp8/encoder/variance_c.c85
14 files changed, 2158 insertions, 221 deletions
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 76aed7e2d..90bc8e987 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -288,6 +288,12 @@ static void kfwrite_ymode(vp8_writer *bc, int m, const vp8_prob *p) {
vp8_write_token(bc, vp8_kf_ymode_tree, p, vp8_kf_ymode_encodings + m);
}
+#if CONFIG_SUPERBLOCKS
+static void sb_kfwrite_ymode(vp8_writer *bc, int m, const vp8_prob *p) {
+ vp8_write_token(bc, vp8_uv_mode_tree, p, vp8_sb_kf_ymode_encodings + m);
+}
+#endif
+
static void write_i8x8_mode(vp8_writer *bc, int m, const vp8_prob *p) {
vp8_write_token(bc, vp8_i8x8_mode_tree, p, vp8_i8x8_mode_encodings + m);
}
@@ -533,6 +539,16 @@ static void write_mv_ref
vp8_mv_ref_encoding_array - NEARESTMV + m);
}
+#if CONFIG_SUPERBLOCKS
+static void write_sb_mv_ref(vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p) {
+#if CONFIG_DEBUG
+ assert(NEARESTMV <= m && m < SPLITMV);
+#endif
+ vp8_write_token(w, vp8_sb_mv_ref_tree, p,
+ vp8_sb_mv_ref_encoding_array - NEARESTMV + m);
+}
+#endif
+
static void write_sub_mv_ref
(
vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p
@@ -810,6 +826,9 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
// Process the 4 MBs in the order:
// top-left, top-right, bottom-left, bottom-right
+#if CONFIG_SUPERBLOCKS
+ vp8_write(w, m->mbmi.encoded_as_sb, pc->sb_coded);
+#endif
for (i = 0; i < 4; i++) {
MB_MODE_INFO *mi;
MV_REFERENCE_FRAME rf;
@@ -872,7 +891,15 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
if (pc->mb_no_coeff_skip &&
(!segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
(get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) {
- vp8_encode_bool(w, mi->mb_skip_coeff,
+ int skip_coeff = mi->mb_skip_coeff;
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ skip_coeff &= m[1].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff;
+ }
+#endif
+ vp8_encode_bool(w, skip_coeff,
get_pred_prob(pc, xd, PRED_MBSKIP));
}
@@ -884,6 +911,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
active_section = 6;
#endif
+ // TODO(rbultje) write using SB tree structure
+
if (!segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
write_ymode(w, mode, pc->fc.ymode_prob);
}
@@ -949,7 +978,14 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
// Is the segment coding of mode enabled
if (!segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
- write_mv_ref(w, mode, mv_ref_p);
+#if CONFIG_SUPERBLOCKS
+ if (mi->encoded_as_sb) {
+ write_sb_mv_ref(w, mode, mv_ref_p);
+ } else
+#endif
+ {
+ write_mv_ref(w, mode, mv_ref_p);
+ }
vp8_accum_mv_refs(&cpi->common, mode, ct);
}
@@ -1085,6 +1121,17 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) {
}
}
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ assert(!i);
+ mb_col += 2;
+ m += 2;
+ cpi->mb.partition_info += 2;
+ prev_m += 2;
+ break;
+ }
+#endif
+
// Next MB
mb_row += dy;
mb_col += dx;
@@ -1151,6 +1198,9 @@ static void write_kfmodes(VP8_COMP *cpi) {
mb_col = 0;
for (col = 0; col < c->mb_cols; col += 2) {
+#if CONFIG_SUPERBLOCKS
+ vp8_write(bc, m->mbmi.encoded_as_sb, c->sb_coded);
+#endif
// Process the 4 MBs in the order:
// top-left, top-right, bottom-left, bottom-right
for (i = 0; i < 4; i++) {
@@ -1181,11 +1231,27 @@ static void write_kfmodes(VP8_COMP *cpi) {
if (c->mb_no_coeff_skip &&
(!segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
(get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) {
- vp8_encode_bool(bc, m->mbmi.mb_skip_coeff,
+ int skip_coeff = m->mbmi.mb_skip_coeff;
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ skip_coeff &= m[1].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis].mbmi.mb_skip_coeff;
+ skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff;
+ }
+#endif
+ vp8_encode_bool(bc, skip_coeff,
get_pred_prob(c, xd, PRED_MBSKIP));
}
- kfwrite_ymode(bc, ym,
- c->kf_ymode_prob[c->kf_ymode_probs_index]);
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ sb_kfwrite_ymode(bc, ym,
+ c->sb_kf_ymode_prob[c->kf_ymode_probs_index]);
+ } else
+#endif
+ {
+ kfwrite_ymode(bc, ym,
+ c->kf_ymode_prob[c->kf_ymode_probs_index]);
+ }
if (ym == B_PRED) {
const int mis = c->mode_info_stride;
@@ -1233,6 +1299,14 @@ static void write_kfmodes(VP8_COMP *cpi) {
} else
write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]);
+#if CONFIG_SUPERBLOCKS
+ if (m->mbmi.encoded_as_sb) {
+ assert(!i);
+ mb_col += 2;
+ m += 2;
+ break;
+ }
+#endif
// Next MB
mb_row += dy;
mb_col += dx;
@@ -1793,7 +1867,7 @@ static void put_delta_q(vp8_writer *bc, int delta_q) {
} else
vp8_write_bit(bc, 0);
}
-extern const unsigned int kf_y_mode_cts[8][VP8_YMODES];
+
static void decide_kf_ymode_entropy(VP8_COMP *cpi) {
int mode_cost[MB_MODE_COUNT];
@@ -1808,6 +1882,13 @@ static void decide_kf_ymode_entropy(VP8_COMP *cpi) {
for (j = 0; j < VP8_YMODES; j++) {
cost += mode_cost[j] * cpi->ymode_count[j];
}
+#if CONFIG_SUPERBLOCKS
+ vp8_cost_tokens(mode_cost, cpi->common.sb_kf_ymode_prob[i],
+ vp8_sb_ymode_tree);
+ for (j = 0; j < VP8_I32X32_MODES; j++) {
+ cost += mode_cost[j] * cpi->sb_ymode_count[j];
+ }
+#endif
if (cost < bestcost) {
bestindex = i;
bestcost = cost;
@@ -1906,11 +1987,6 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
// Select the coding strategy (temporal or spatial)
choose_segmap_coding_method(cpi);
- // Take a copy of the segment map if it changed for
- // future comparison
- vpx_memcpy(pc->last_frame_seg_map,
- cpi->segmentation_map, pc->MBs);
-
// Write out the chosen coding method.
vp8_write_bit(bc, (pc->temporal_update) ? 1 : 0);
}
@@ -2048,6 +2124,19 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
}
}
+#if CONFIG_SUPERBLOCKS
+ {
+ /* sb mode probability */
+ int sb_coded = 256 - (cpi->sb_count << 8) / (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1));
+ if (sb_coded <= 0)
+ sb_coded = 1;
+ else if (sb_coded >= 256)
+ sb_coded = 255;
+ pc->sb_coded = sb_coded;
+ vp8_write_literal(bc, pc->sb_coded, 8);
+ }
+#endif
+
vp8_write_bit(bc, pc->txfm_mode);
// Encode the loop filter level and type
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index dfc1d743e..d73af4faa 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -82,7 +82,9 @@ typedef struct {
int best_mode_index;
int rddiv;
int rdmult;
-
+ int hybrid_pred_diff;
+ int comp_pred_diff;
+ int single_pred_diff;
} PICK_MODE_CONTEXT;
typedef struct {
@@ -139,12 +141,6 @@ typedef struct {
int mv_col_max;
int mv_row_min;
int mv_row_max;
-#if CONFIG_SUPERBLOCKS
- int mv_col_min_sb;
- int mv_col_max_sb;
- int mv_row_min_sb;
- int mv_row_max_sb;
-#endif
int skip;
@@ -163,8 +159,6 @@ typedef struct {
int optimize;
int q_index;
- int encode_as_sb;
-
// Structure to hold context for each of the 4 MBs within a SB:
// when encoded as 4 independent MBs:
PICK_MODE_CONTEXT mb_context[4];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index e58c852a7..4472497e0 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -57,16 +57,24 @@ extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
MB_ROW_COMP *mbr_ei,
int mb_row,
int count);
-extern int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
+int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset, int recon_uvoffset,
+ int *returnrate, int *returndistortion);
+extern void vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
int recon_yoffset,
- int recon_uvoffset);
+ int recon_uvoffset, int *r, int *d);
void vp8_build_block_offsets(MACROBLOCK *x);
void vp8_setup_block_ptrs(MACROBLOCK *x);
void vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
int recon_yoffset, int recon_uvoffset,
int output_enabled);
+void vp8cx_encode_inter_superblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
+ int recon_yoffset, int recon_uvoffset, int mb_col, int mb_row);
void vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x,
TOKENEXTRA **t, int output_enabled);
+void vp8cx_encode_intra_super_block(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ TOKENEXTRA **t, int mb_col);
static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x);
@@ -378,6 +386,13 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
// Restore the coding context of the MB to that that was in place
// when the mode was picked for it
vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO));
+#if CONFIG_SUPERBLOCKS
+ if (mi->mbmi.encoded_as_sb) {
+ vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO));
+ vpx_memcpy(xd->mode_info_context + cpi->common.mode_info_stride, mi, sizeof(MODE_INFO));
+ vpx_memcpy(xd->mode_info_context + cpi->common.mode_info_stride + 1, mi, sizeof(MODE_INFO));
+ }
+#endif
if (mb_mode == B_PRED) {
for (i = 0; i < 16; i++) {
@@ -448,6 +463,10 @@ static void update_state(VP8_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
cpi->prediction_error += ctx->distortion;
cpi->intra_error += ctx->intra_error;
+
+ cpi->rd_comp_pred_diff[0] += ctx->single_pred_diff;
+ cpi->rd_comp_pred_diff[1] += ctx->comp_pred_diff;
+ cpi->rd_comp_pred_diff[2] += ctx->hybrid_pred_diff;
}
}
@@ -458,7 +477,8 @@ static void pick_mb_modes(VP8_COMP *cpi,
MACROBLOCK *x,
MACROBLOCKD *xd,
TOKENEXTRA **tp,
- int *totalrate) {
+ int *totalrate,
+ int *totaldist) {
int i;
int map_index;
int recon_yoffset, recon_uvoffset;
@@ -477,7 +497,7 @@ static void pick_mb_modes(VP8_COMP *cpi,
/* Function should not modify L & A contexts; save and restore on exit */
vpx_memcpy(left_context,
- cpi->left_context,
+ cm->left_context,
sizeof(left_context));
vpx_memcpy(above_context,
initial_above_context_ptr,
@@ -525,9 +545,7 @@ static void pick_mb_modes(VP8_COMP *cpi,
// Restore the appropriate left context depending on which
// row in the SB the MB is situated
- vpx_memcpy(&cm->left_context,
- &cpi->left_context[i >> 1],
- sizeof(ENTROPY_CONTEXT_PLANES));
+ xd->left_context = cm->left_context + (i >> 1);
// Set up distance of MB to edge of frame in 1/8th pel units
xd->mb_to_top_edge = -((mb_row * 16) << 3);
@@ -568,9 +586,11 @@ static void pick_mb_modes(VP8_COMP *cpi,
// Is segmentation enabled
if (xd->segmentation_enabled) {
// Code to set segment id in xd->mbmi.segment_id
- if (cpi->segmentation_map[map_index] <= 3)
+ if (xd->update_mb_segmentation_map)
mbmi->segment_id = cpi->segmentation_map[map_index];
else
+ mbmi->segment_id = cm->last_frame_seg_map[map_index];
+ if (mbmi->segment_id > 3)
mbmi->segment_id = 0;
vp8cx_mb_init_quantizer(cpi, x);
@@ -583,22 +603,29 @@ static void pick_mb_modes(VP8_COMP *cpi,
/* force 4x4 transform for mode selection */
mbmi->txfm_size = TX_4X4; // TODO IS this right??
+#if CONFIG_SUPERBLOCKS
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+#endif
+
cpi->update_context = 0; // TODO Do we need this now??
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (cm->frame_type == KEY_FRAME) {
- *totalrate += vp8_rd_pick_intra_mode(cpi, x);
-
- // Save the coding context
- vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context,
- sizeof(MODE_INFO));
+ int r, d;
+ vp8_rd_pick_intra_mode(cpi, x, &r, &d);
+ *totalrate += r;
+ *totaldist += d;
// Dummy encode, do not do the tokenization
vp8cx_encode_intra_macro_block(cpi, x, tp, 0);
// Note the encoder may have changed the segment_id
+
+ // Save the coding context
+ vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context,
+ sizeof(MODE_INFO));
} else {
- int seg_id;
+ int seg_id, r, d;
if (xd->segmentation_enabled && cpi->seg0_cnt > 0 &&
!segfeature_active(xd, 0, SEG_LVL_REF_FRAME) &&
@@ -612,9 +639,10 @@ static void pick_mb_modes(VP8_COMP *cpi,
cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs;
}
- *totalrate += vp8cx_pick_mode_inter_macroblock(cpi, x,
- recon_yoffset,
- recon_uvoffset);
+ vp8cx_pick_mode_inter_macroblock(cpi, x, recon_yoffset,
+ recon_uvoffset, &r, &d);
+ *totalrate += r;
+ *totaldist += d;
// Dummy encode, do not do the tokenization
vp8cx_encode_inter_macroblock(cpi, x, tp,
@@ -639,11 +667,6 @@ static void pick_mb_modes(VP8_COMP *cpi,
}
}
- // Keep a copy of the updated left context
- vpx_memcpy(&cpi->left_context[i >> 1],
- &cm->left_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
-
// Next MB
mb_row += dy;
mb_col += dx;
@@ -664,7 +687,7 @@ static void pick_mb_modes(VP8_COMP *cpi,
}
/* Restore L & A coding context to those in place on entry */
- vpx_memcpy(cpi->left_context,
+ vpx_memcpy(cm->left_context,
left_context,
sizeof(left_context));
vpx_memcpy(initial_above_context_ptr,
@@ -672,6 +695,156 @@ static void pick_mb_modes(VP8_COMP *cpi,
sizeof(above_context));
}
+#if CONFIG_SUPERBLOCKS
+static void pick_sb_modes (VP8_COMP *cpi,
+ VP8_COMMON *cm,
+ int mb_row,
+ int mb_col,
+ MACROBLOCK *x,
+ MACROBLOCKD *xd,
+ TOKENEXTRA **tp,
+ int *totalrate,
+ int *totaldist)
+{
+ int map_index;
+ int recon_yoffset, recon_uvoffset;
+ int ref_fb_idx = cm->lst_fb_idx;
+ int dst_fb_idx = cm->new_fb_idx;
+ int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+ int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
+ ENTROPY_CONTEXT_PLANES left_context[2];
+ ENTROPY_CONTEXT_PLANES above_context[2];
+ ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context
+ + mb_col;
+
+ /* Function should not modify L & A contexts; save and restore on exit */
+ vpx_memcpy (left_context,
+ cm->left_context,
+ sizeof(left_context));
+ vpx_memcpy (above_context,
+ initial_above_context_ptr,
+ sizeof(above_context));
+
+ map_index = (mb_row * cpi->common.mb_cols) + mb_col;
+ x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
+
+ /* set above context pointer */
+ xd->above_context = cm->above_context + mb_col;
+
+ /* Restore the appropriate left context depending on which
+ * row in the SB the MB is situated */
+ xd->left_context = cm->left_context;
+
+ // Set up distance of MB to edge of frame in 1/8th pel units
+ xd->mb_to_top_edge = -((mb_row * 16) << 3);
+ xd->mb_to_left_edge = -((mb_col * 16) << 3);
+ xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+ xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
+
+ /* Set up limit values for MV components to prevent them from
+ * extending beyond the UMV borders assuming 16x16 block size */
+ x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
+
+ xd->up_available = (mb_row != 0);
+ xd->left_available = (mb_col != 0);
+
+ recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16);
+ recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8);
+
+ xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+ xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+ xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+#if 0 // FIXME
+ /* Copy current MB to a work buffer */
+ RECON_INVOKE(&xd->rtcd->recon, copy16x16)(x->src.y_buffer,
+ x->src.y_stride,
+ x->thismb, 16);
+#endif
+ x->rddiv = cpi->RDDIV;
+ x->rdmult = cpi->RDMULT;
+ if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ vp8_activity_masking(cpi, x);
+ /* Is segmentation enabled */
+ if (xd->segmentation_enabled)
+ {
+ /* Code to set segment id in xd->mbmi.segment_id */
+ if (xd->update_mb_segmentation_map)
+ xd->mode_info_context->mbmi.segment_id =
+ cpi->segmentation_map[map_index] &&
+ cpi->segmentation_map[map_index + 1] &&
+ cpi->segmentation_map[map_index + cm->mb_cols] &&
+ cpi->segmentation_map[map_index + cm->mb_cols + 1];
+ else
+ xd->mode_info_context->mbmi.segment_id =
+ cm->last_frame_seg_map[map_index] &&
+ cm->last_frame_seg_map[map_index + 1] &&
+ cm->last_frame_seg_map[map_index + cm->mb_cols] &&
+ cm->last_frame_seg_map[map_index + cm->mb_cols + 1];
+ if (xd->mode_info_context->mbmi.segment_id > 3)
+ xd->mode_info_context->mbmi.segment_id = 0;
+
+ vp8cx_mb_init_quantizer(cpi, x);
+ }
+ else
+ /* Set to Segment 0 by default */
+ xd->mode_info_context->mbmi.segment_id = 0;
+
+ x->active_ptr = cpi->active_map + map_index;
+
+ cpi->update_context = 0; // TODO Do we need this now??
+
+ /* Find best coding mode & reconstruct the MB so it is available
+ * as a predictor for MBs that follow in the SB */
+ if (cm->frame_type == KEY_FRAME)
+ {
+ vp8_rd_pick_intra_mode_sb(cpi, x,
+ totalrate,
+ totaldist);
+
+ /* Save the coding context */
+ vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context,
+ sizeof(MODE_INFO));
+ }
+ else
+ {
+ if (xd->segmentation_enabled && cpi->seg0_cnt > 0 &&
+ !segfeature_active( xd, 0, SEG_LVL_REF_FRAME ) &&
+ segfeature_active( xd, 1, SEG_LVL_REF_FRAME ) &&
+ check_segref(xd, 1, INTRA_FRAME) +
+ check_segref(xd, 1, LAST_FRAME) +
+ check_segref(xd, 1, GOLDEN_FRAME) +
+ check_segref(xd, 1, ALTREF_FRAME) == 1)
+ {
+ cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt;
+ }
+ else
+ {
+ cpi->seg0_progress =
+ (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs;
+ }
+
+ vp8_rd_pick_inter_mode_sb(cpi, x,
+ recon_yoffset,
+ recon_uvoffset,
+ totalrate,
+ totaldist);
+ }
+
+ /* Restore L & A coding context to those in place on entry */
+ vpx_memcpy (cm->left_context,
+ left_context,
+ sizeof(left_context));
+ vpx_memcpy (initial_above_context_ptr,
+ above_context,
+ sizeof(above_context));
+}
+#endif
+
static void encode_sb(VP8_COMP *cpi,
VP8_COMMON *cm,
int mbrow,
@@ -679,6 +852,7 @@ static void encode_sb(VP8_COMP *cpi,
MACROBLOCK *x,
MACROBLOCKD *xd,
TOKENEXTRA **tp) {
+ VP8_COMMON *pc = cm;
int i;
int map_index;
int mb_row, mb_col;
@@ -733,22 +907,19 @@ static void encode_sb(VP8_COMP *cpi,
// Restore MB state to that when it was picked
#if CONFIG_SUPERBLOCKS
- if (x->encode_as_sb)
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
update_state(cpi, x, &x->sb_context[i]);
- else
+ cpi->sb_count++;
+ } else
#endif
update_state(cpi, x, &x->mb_context[i]);
- // Copy in the appropriate left context
- vpx_memcpy(&cm->left_context,
- &cpi->left_context[i >> 1],
- sizeof(ENTROPY_CONTEXT_PLANES));
-
map_index = (mb_row * cpi->common.mb_cols) + mb_col;
x->mb_activity_ptr = &cpi->mb_activity_map[map_index];
// reset above block coeffs
xd->above_context = cm->above_context + mb_col;
+ xd->left_context = cm->left_context + (i >> 1);
// Set up distance of MB to edge of the frame in 1/8th pel units
xd->mb_to_top_edge = -((mb_row * 16) << 3);
@@ -756,24 +927,28 @@ static void encode_sb(VP8_COMP *cpi,
xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
- // Set up limit values for MV components to prevent them from
- // extending beyond the UMV borders assuming 16x16 block size
- x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
- x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
- x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
- (VP8BORDERINPIXELS - 16 - INTERP_EXTEND));
- x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
- (VP8BORDERINPIXELS - 16 - INTERP_EXTEND));
-
#if CONFIG_SUPERBLOCKS
- // Set up limit values for MV components to prevent them from
- // extending beyond the UMV borders assuming 32x32 block size
- x->mv_row_min_sb = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
- x->mv_col_min_sb = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
- x->mv_row_max_sb = ((cm->mb_rows - mb_row) * 16 +
- (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
- x->mv_col_max_sb = ((cm->mb_cols - mb_col) * 16 +
- (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ // Set up limit values for MV components to prevent them from
+ // extending beyond the UMV borders assuming 32x32 block size
+ x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP8BORDERINPIXELS - 32 - INTERP_EXTEND));
+ } else {
+#endif
+ // Set up limit values for MV components to prevent them from
+ // extending beyond the UMV borders assuming 16x16 block size
+ x->mv_row_min = -((mb_row * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_col_min = -((mb_col * 16) + VP8BORDERINPIXELS - INTERP_EXTEND);
+ x->mv_row_max = ((cm->mb_rows - mb_row) * 16 +
+ (VP8BORDERINPIXELS - 16 - INTERP_EXTEND));
+ x->mv_col_max = ((cm->mb_cols - mb_col) * 16 +
+ (VP8BORDERINPIXELS - 16 - INTERP_EXTEND));
+#if CONFIG_SUPERBLOCKS
+ }
#endif
xd->up_available = (mb_row != 0);
@@ -796,24 +971,21 @@ static void encode_sb(VP8_COMP *cpi,
// Is segmentation enabled
if (xd->segmentation_enabled) {
- // Code to set segment id in xd->mbmi.segment_id
- if (cpi->segmentation_map[map_index] <= 3)
- mbmi->segment_id = cpi->segmentation_map[map_index];
- else
- mbmi->segment_id = 0;
-
vp8cx_mb_init_quantizer(cpi, x);
- } else
- // Set to Segment 0 by default
- mbmi->segment_id = 0;
+ }
x->active_ptr = cpi->active_map + map_index;
cpi->update_context = 0;
if (cm->frame_type == KEY_FRAME) {
- vp8cx_encode_intra_macro_block(cpi, x, tp, 1);
- // Note the encoder may have changed the segment_id
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ vp8cx_encode_intra_super_block(cpi, x, tp, mb_col);
+ else
+#endif
+ vp8cx_encode_intra_macro_block(cpi, x, tp, 1);
+ // Note the encoder may have changed the segment_id
#ifdef MODE_STATS
y_modes[mbmi->mode]++;
@@ -822,9 +994,25 @@ static void encode_sb(VP8_COMP *cpi,
unsigned char *segment_id;
int seg_ref_active;
- vp8cx_encode_inter_macroblock(cpi, x, tp,
- recon_yoffset, recon_uvoffset, 1);
- // Note the encoder may have changed the segment_id
+ if (xd->mode_info_context->mbmi.ref_frame) {
+ unsigned char pred_context;
+
+ pred_context = get_pred_context(cm, xd, PRED_COMP);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME)
+ cpi->single_pred_count[pred_context]++;
+ else
+ cpi->comp_pred_count[pred_context]++;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb)
+ vp8cx_encode_inter_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, mb_col, mb_row);
+ else
+#endif
+ vp8cx_encode_inter_macroblock(cpi, x, tp,
+ recon_yoffset, recon_uvoffset, 1);
+ // Note the encoder may have changed the segment_id
#ifdef MODE_STATS
inter_y_modes[mbmi->mode]++;
@@ -864,10 +1052,20 @@ static void encode_sb(VP8_COMP *cpi,
// TODO Partitioning is broken!
cpi->tplist[mb_row].stop = *tp;
- // Copy back updated left context
- vpx_memcpy(&cpi->left_context[i >> 1],
- &cm->left_context,
- sizeof(ENTROPY_CONTEXT_PLANES));
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ x->src.y_buffer += 32;
+ x->src.u_buffer += 16;
+ x->src.v_buffer += 16;
+
+ x->gf_active_ptr += 2;
+ x->partition_info += 2;
+ xd->mode_info_context += 2;
+ xd->prev_mode_info_context += 2;
+
+ break;
+ }
+#endif
// Next MB
mb_row += dy;
@@ -911,14 +1109,13 @@ void encode_sb_row(VP8_COMP *cpi,
int mb_cols = cm->mb_cols;
// Initialize the left context for the new SB row
- vpx_memset(cpi->left_context, 0, sizeof(cpi->left_context));
- vpx_memset(&cm->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memset(cm->left_context, 0, sizeof(cm->left_context));
// Code each SB in the row
for (mb_col = 0; mb_col < mb_cols; mb_col += 2) {
- int mb_rate = 0;
+ int mb_rate = 0, mb_dist = 0;
#if CONFIG_SUPERBLOCKS
- int sb_rate = INT_MAX;
+ int sb_rate = INT_MAX, sb_dist;
#endif
#if CONFIG_DEBUG
@@ -930,8 +1127,14 @@ void encode_sb_row(VP8_COMP *cpi,
unsigned char *vb = x->src.v_buffer;
#endif
+#if CONFIG_SUPERBLOCKS
// Pick modes assuming the SB is coded as 4 independent MBs
- pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate);
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+#endif
+ pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist);
+#if CONFIG_SUPERBLOCKS
+ mb_rate += vp8_cost_bit(cm->sb_coded, 0);
+#endif
x->src.y_buffer -= 32;
x->src.u_buffer -= 16;
@@ -952,21 +1155,40 @@ void encode_sb_row(VP8_COMP *cpi,
#endif
#if CONFIG_SUPERBLOCKS
- // Pick a mode assuming that it applies all 4 of the MBs in the SB
- pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, &sb_rate);
+ if (!((( mb_cols & 1) && mb_col == mb_cols - 1) ||
+ ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) {
+ /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */
+ xd->mode_info_context->mbmi.encoded_as_sb = 1;
+ pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist);
+ sb_rate += vp8_cost_bit(cm->sb_coded, 1);
+ }
- // Decide whether to encode as a SB or 4xMBs
- if (sb_rate < mb_rate) {
- x->encode_as_sb = 1;
+ /* Decide whether to encode as a SB or 4xMBs */
+ if (sb_rate < INT_MAX &&
+ RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) <
+ RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) {
+ xd->mode_info_context->mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[1].mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1;
+ xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1;
*totalrate += sb_rate;
} else
#endif
{
- x->encode_as_sb = 0;
+#if CONFIG_SUPERBLOCKS
+ xd->mode_info_context->mbmi.encoded_as_sb = 0;
+ if (cm->mb_cols - 1 > mb_col)
+ xd->mode_info_context[1].mbmi.encoded_as_sb = 0;
+ if (cm->mb_rows - 1 > mb_row) {
+ xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0;
+ if (cm->mb_cols - 1 > mb_col)
+ xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0;
+ }
+#endif
*totalrate += mb_rate;
}
- // Encode SB using best computed mode(s)
+ /* Encode SB using best computed mode(s) */
encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp);
#if CONFIG_DEBUG
@@ -1038,8 +1260,6 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) {
xd->mode_info_context->mbmi.mode = DC_PRED;
xd->mode_info_context->mbmi.uv_mode = DC_PRED;
- xd->left_context = &cm->left_context;
-
vp8_zero(cpi->count_mb_ref_frame_usage)
vp8_zero(cpi->bmode_count)
vp8_zero(cpi->ymode_count)
@@ -1049,6 +1269,10 @@ void init_encode_frame_mb_context(VP8_COMP *cpi) {
vp8_zero(cpi->mbsplit_count)
vp8_zero(cpi->common.fc.mv_ref_ct)
vp8_zero(cpi->common.fc.mv_ref_ct_a)
+#if CONFIG_SUPERBLOCKS
+ vp8_zero(cpi->sb_ymode_count)
+ cpi->sb_count = 0;
+#endif
// vp8_zero(cpi->uv_mode_count)
x->mvc = cm->fc.mvc;
@@ -1380,7 +1604,12 @@ static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) {
}
#endif
- ++cpi->ymode_count[m];
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ ++cpi->sb_ymode_count[m];
+ } else
+#endif
+ ++cpi->ymode_count[m];
if (m != I8X8_PRED)
++cpi->y_uv_mode_count[m][uvm];
else {
@@ -1418,6 +1647,160 @@ static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x) {
#endif
}
+#if CONFIG_SUPERBLOCKS
+static void update_sb_skip_coeff_state(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ ENTROPY_CONTEXT_PLANES ta[4],
+ ENTROPY_CONTEXT_PLANES tl[4],
+ TOKENEXTRA *t[4],
+ TOKENEXTRA **tp,
+ int skip[4])
+{
+ TOKENEXTRA tokens[4][16 * 24];
+ int n_tokens[4], n;
+
+ // if there were no skips, we don't need to do anything
+ if (!skip[0] && !skip[1] && !skip[2] && !skip[3])
+ return;
+
+ // if we don't do coeff skipping for this frame, we don't
+ // need to do anything here
+ if (!cpi->common.mb_no_coeff_skip)
+ return;
+
+ // if all 4 MBs skipped coeff coding, nothing to be done
+ if (skip[0] && skip[1] && skip[2] && skip[3])
+ return;
+
+ // so the situation now is that we want to skip coeffs
+ // for some MBs, but not all, and we didn't code EOB
+ // coefficients for them. However, the skip flag for this
+ // SB will be 0 overall, so we need to insert EOBs in the
+ // middle of the token tree. Do so here.
+ n_tokens[0] = t[1] - t[0];
+ n_tokens[1] = t[2] - t[1];
+ n_tokens[2] = t[3] - t[2];
+ n_tokens[3] = *tp - t[3];
+ if (n_tokens[0])
+ memcpy(tokens[0], t[0], n_tokens[0] * sizeof(*t[0]));
+ if (n_tokens[1])
+ memcpy(tokens[1], t[1], n_tokens[1] * sizeof(*t[0]));
+ if (n_tokens[2])
+ memcpy(tokens[2], t[2], n_tokens[2] * sizeof(*t[0]));
+ if (n_tokens[3])
+ memcpy(tokens[3], t[3], n_tokens[3] * sizeof(*t[0]));
+
+ // reset pointer, stuff EOBs where necessary
+ *tp = t[0];
+ for (n = 0; n < 4; n++) {
+ TOKENEXTRA *tbak = *tp;
+ if (skip[n]) {
+ x->e_mbd.above_context = &ta[n];
+ x->e_mbd.left_context = &tl[n];
+ vp8_stuff_mb_8x8(cpi, &x->e_mbd, tp, 0);
+ } else {
+ if (n_tokens[n]) {
+ memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]);
+ }
+ (*tp) += n_tokens[n];
+ }
+ }
+}
+
+void vp8cx_encode_intra_super_block(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ TOKENEXTRA **t,
+ int mb_col) {
+ const int output_enabled = 1;
+ int n;
+ MACROBLOCKD *xd = &x->e_mbd;
+ VP8_COMMON *cm = &cpi->common;
+ const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ const VP8_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd);
+ TOKENEXTRA *tp[4];
+ int skip[4];
+ MODE_INFO *mi = x->e_mbd.mode_info_context;
+ ENTROPY_CONTEXT_PLANES ta[4], tl[4];
+
+ if ((cpi->oxcf.tuning == VP8_TUNE_SSIM) && output_enabled) {
+ adjust_act_zbin(cpi, x);
+ vp8_update_zbin_extra(cpi, x);
+ }
+
+ /* test code: set transform size based on mode selection */
+ if (cpi->common.txfm_mode == ALLOW_8X8) {
+ x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+ x->e_mbd.mode_info_context[1].mbmi.txfm_size = TX_8X8;
+ x->e_mbd.mode_info_context[cm->mode_info_stride].mbmi.txfm_size = TX_8X8;
+ x->e_mbd.mode_info_context[cm->mode_info_stride+1].mbmi.txfm_size = TX_8X8;
+ cpi->t8x8_count++;
+ } else {
+ x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+ cpi->t4x4_count++;
+ }
+
+ RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sby_s)(&x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sbuv_s)(&x->e_mbd);
+
+ assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8);
+ for (n = 0; n < 4; n++)
+ {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ xd->above_context = cm->above_context + mb_col + (n & 1);
+ xd->left_context = cm->left_context + (n >> 1);
+
+ vp8_subtract_mby_s_c(x->src_diff,
+ src + x_idx * 16 + y_idx * 16 * src_y_stride,
+ src_y_stride,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
+ dst_y_stride);
+ vp8_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+ vp8_transform_intra_mby_8x8(x);
+ vp8_transform_mbuv_8x8(x);
+ vp8_quantize_mby_8x8(x);
+ vp8_quantize_mbuv_8x8(x);
+ if (x->optimize) {
+ vp8_optimize_mby_8x8(x, rtcd);
+ vp8_optimize_mbuv_8x8(x, rtcd);
+ }
+ vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+ vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+ vp8_recon_mby_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
+ vp8_recon_mbuv_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
+
+ if (output_enabled) {
+ memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
+ memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
+ tp[n] = *t;
+ xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride;
+ vp8_tokenize_mb(cpi, &x->e_mbd, t, 0);
+ skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
+ }
+ }
+
+ if (output_enabled) {
+ // Tokenize
+ xd->mode_info_context = mi;
+ sum_intra_stats(cpi, x);
+ update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip);
+ }
+}
+#endif
+
void vp8cx_encode_intra_macro_block(VP8_COMP *cpi,
MACROBLOCK *x,
TOKENEXTRA **t,
@@ -1484,6 +1867,9 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
unsigned char ref_pred_flag;
x->skip = 0;
+#if CONFIG_SUPERBLOCKS
+ assert(!xd->mode_info_context->mbmi.encoded_as_sb);
+#endif
#if CONFIG_SWITCHABLE_INTERP
vp8_setup_interp_filters(xd, mbmi->interp_filter, cm);
@@ -1648,3 +2034,190 @@ void vp8cx_encode_inter_macroblock (VP8_COMP *cpi, MACROBLOCK *x,
}
}
}
+
+#if CONFIG_SUPERBLOCKS
+void vp8cx_encode_inter_superblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
+ int recon_yoffset, int recon_uvoffset, int mb_col, int mb_row) {
+ const int output_enabled = 1;
+ VP8_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ const VP8_ENCODER_RTCD *rtcd = IF_RTCD(&cpi->rtcd);
+ int mis = xd->mode_info_stride;
+ unsigned int segment_id = xd->mode_info_context->mbmi.segment_id;
+ int seg_ref_active;
+ unsigned char ref_pred_flag;
+ int n;
+ TOKENEXTRA *tp[4];
+ int skip[4];
+ MODE_INFO *mi = x->e_mbd.mode_info_context;
+ ENTROPY_CONTEXT_PLANES ta[4], tl[4];
+
+ x->skip = 0;
+
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
+ // Adjust the zbin based on this MB rate.
+ adjust_act_zbin(cpi, x);
+ }
+
+ {
+ // Experimental code. Special case for gf and arf zeromv modes.
+ // Increase zbin size to suppress noise
+ cpi->zbin_mode_boost = 0;
+ if (cpi->zbin_mode_boost_enabled) {
+ if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) {
+ if (xd->mode_info_context->mbmi.mode == ZEROMV) {
+ if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
+ cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+ else
+ cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+ } else if (xd->mode_info_context->mbmi.mode == SPLITMV)
+ cpi->zbin_mode_boost = 0;
+ else
+ cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+ }
+ }
+
+ vp8_update_zbin_extra(cpi, x);
+ }
+
+ seg_ref_active = segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME);
+
+ // SET VARIOUS PREDICTION FLAGS
+
+ // Did the chosen reference frame match its predicted value.
+ ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame ==
+ get_pred_ref(cm, xd)));
+ set_pred_flag(xd, PRED_REF, ref_pred_flag);
+
+ /* test code: set transform size based on mode selection */
+ if (cpi->common.txfm_mode == ALLOW_8X8
+ && x->e_mbd.mode_info_context->mbmi.mode != I8X8_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+ && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) {
+ x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+ cpi->t8x8_count++;
+ } else {
+ x->e_mbd.mode_info_context->mbmi.txfm_size = TX_4X4;
+ cpi->t4x4_count++;
+ }
+
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sby_s)(&x->e_mbd);
+ RECON_INVOKE(&rtcd->common->recon, build_intra_predictors_sbuv_s)(&x->e_mbd);
+ } else {
+ int ref_fb_idx;
+
+ if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
+ ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
+ ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+ xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+ xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame) {
+ int second_ref_fb_idx;
+
+ if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME)
+ second_ref_fb_idx = cpi->common.lst_fb_idx;
+ else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME)
+ second_ref_fb_idx = cpi->common.gld_fb_idx;
+ else
+ second_ref_fb_idx = cpi->common.alt_fb_idx;
+
+ xd->second_pre.y_buffer = cpi->common.yv12_fb[second_ref_fb_idx].y_buffer +
+ recon_yoffset;
+ xd->second_pre.u_buffer = cpi->common.yv12_fb[second_ref_fb_idx].u_buffer +
+ recon_uvoffset;
+ xd->second_pre.v_buffer = cpi->common.yv12_fb[second_ref_fb_idx].v_buffer +
+ recon_uvoffset;
+ }
+
+ vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.y_stride, xd->dst.uv_stride);
+ }
+
+ assert(x->e_mbd.mode_info_context->mbmi.txfm_size == TX_8X8);
+ for (n = 0; n < 4; n++)
+ {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ vp8_subtract_mby_s_c(x->src_diff,
+ src + x_idx * 16 + y_idx * 16 * src_y_stride,
+ src_y_stride,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
+ dst_y_stride);
+ vp8_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ vp8_transform_intra_mby_8x8(x);
+ } else {
+ vp8_transform_mby_8x8(x);
+ }
+ vp8_transform_mbuv_8x8(x);
+ vp8_quantize_mby_8x8(x);
+ vp8_quantize_mbuv_8x8(x);
+ if (x->optimize) {
+ vp8_optimize_mby_8x8(x, rtcd);
+ vp8_optimize_mbuv_8x8(x, rtcd);
+ }
+ vp8_inverse_transform_mby_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+ vp8_inverse_transform_mbuv_8x8(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
+ vp8_recon_mby_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride);
+ vp8_recon_mbuv_s_c(IF_RTCD(&rtcd->common->recon), &x->e_mbd,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride);
+
+ if (!x->skip) {
+ if (output_enabled) {
+ xd->left_context = cm->left_context + (n >> 1);
+ xd->above_context = cm->above_context + mb_col + (n >> 1);
+ memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
+ memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
+ tp[n] = *t;
+ xd->mode_info_context = mi + x_idx + y_idx * cm->mode_info_stride;
+ vp8_tokenize_mb(cpi, &x->e_mbd, t, 0);
+ skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff;
+ }
+ } else {
+ int mb_skip_context =
+ cpi->common.mb_no_coeff_skip ?
+ (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff +
+ (x->e_mbd.mode_info_context - cpi->common.mode_info_stride)->mbmi.mb_skip_coeff :
+ 0;
+ if (cpi->common.mb_no_coeff_skip) {
+ skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff = 1;
+ xd->left_context = cm->left_context + (n >> 1);
+ xd->above_context = cm->above_context + mb_col + (n >> 1);
+ memcpy(&ta[n], xd->above_context, sizeof(ta[n]));
+ memcpy(&tl[n], xd->left_context, sizeof(tl[n]));
+ tp[n] = *t;
+ cpi->skip_true_count[mb_skip_context]++;
+ vp8_fix_contexts(xd);
+ } else {
+ vp8_stuff_mb(cpi, xd, t, 0);
+ xd->mode_info_context->mbmi.mb_skip_coeff = 0;
+ cpi->skip_false_count[mb_skip_context]++;
+ }
+ }
+ }
+
+ xd->mode_info_context = mi;
+ update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip);
+}
+#endif
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 454244457..8c48b0d83 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -67,11 +67,10 @@ void vp8_subtract_4b_c(BLOCK *be, BLOCKD *bd, int pitch) {
}
}
-void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) {
+void vp8_subtract_mbuv_s_c(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride,
+ unsigned char *upred, unsigned char *vpred, int dst_stride) {
short *udiff = diff + 256;
short *vdiff = diff + 320;
- unsigned char *upred = pred + 256;
- unsigned char *vpred = pred + 320;
int r, c;
@@ -81,8 +80,8 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
udiff += 8;
- upred += 8;
- usrc += stride;
+ upred += dst_stride;
+ usrc += src_stride;
}
for (r = 0; r < 8; r++) {
@@ -91,12 +90,19 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
}
vdiff += 8;
- vpred += 8;
- vsrc += stride;
+ vpred += dst_stride;
+ vsrc += src_stride;
}
}
-void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride) {
+void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) {
+ unsigned char *upred = pred + 256;
+ unsigned char *vpred = pred + 320;
+
+ vp8_subtract_mbuv_s_c(diff, usrc, vsrc, stride, upred, vpred, 8);
+}
+
+void vp8_subtract_mby_s_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int dst_stride) {
int r, c;
for (r = 0; r < 16; r++) {
@@ -105,11 +111,16 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, in
}
diff += 16;
- pred += 16;
- src += stride;
+ pred += dst_stride;
+ src += src_stride;
}
}
+void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride)
+{
+ vp8_subtract_mby_s_c(diff, src, stride, pred, 16);
+}
+
static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) {
BLOCK *b = &x->block[0];
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 6390f3fe4..6a5bf59d5 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -23,24 +23,36 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER
void vp8_cmachine_specific_config(VP8_COMP *cpi) {
#if CONFIG_RUNTIME_CPU_DETECT
cpi->rtcd.common = &cpi->common.rtcd;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.sad32x32 = vp8_sad32x32_c;
+#endif
cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c;
cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c;
cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c;
cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c;
cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.sad32x32x3 = vp8_sad32x32x3_c;
+#endif
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_c;
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_c;
cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_c;
cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c;
cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.sad32x32x8 = vp8_sad32x32x8_c;
+#endif
cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c;
cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c;
cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c;
cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c;
cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.sad32x32x4d = vp8_sad32x32x4d_c;
+#endif
cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c;
cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c;
cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c;
@@ -54,16 +66,34 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) {
cpi->rtcd.variance.var8x16 = vp8_variance8x16_c;
cpi->rtcd.variance.var16x8 = vp8_variance16x8_c;
cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.var32x32 = vp8_variance32x32_c;
+#endif
cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;
cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c;
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.subpixvar32x32 = vp8_sub_pixel_variance32x32_c;
+#endif
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.halfpixvar32x32_h = vp8_variance_halfpixvar32x32_h_c;
+#endif
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.halfpixvar32x32_v = vp8_variance_halfpixvar32x32_v_c;
+#endif
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.halfpixvar32x32_hv = vp8_variance_halfpixvar32x32_hv_c;
+#endif
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c;
+#if CONFIG_SUPERBLOCKS
+ cpi->rtcd.variance.subpixmse32x32 = vp8_sub_pixel_mse32x32_c;
+#endif
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index ba4cd897d..a0621b649 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -243,7 +243,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
int y_stride;
int offset;
-#if ARCH_X86 || ARCH_X86_64
+#if !CONFIG_SUPERBLOCKS && (ARCH_X86 || ARCH_X86_64)
unsigned char *y0 = *(d->base_pre) + d->pre + (bestmv->as_mv.row) * d->pre_stride + bestmv->as_mv.col;
unsigned char *y;
int buf_r1, buf_r2, buf_c1, buf_c2;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index d7a9456d1..256c70386 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -620,6 +620,42 @@ static void print_seg_map(VP8_COMP *cpi) {
fclose(statsfile);
}
+static void update_reference_segmentation_map(VP8_COMP *cpi) {
+ VP8_COMMON *cm = &cpi->common;
+ int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1;
+ MODE_INFO *mi = cm->mi;
+ uint8_t *segmap = cpi->segmentation_map;
+ uint8_t *segcache = cm->last_frame_seg_map;
+
+ for (row = 0; row < sb_rows; row++) {
+ for (col = 0; col < sb_cols; col++) {
+ MODE_INFO *miptr = mi + col * 2;
+ uint8_t *seg = segmap + col * 2;
+ uint8_t *cache = segcache + col * 2;
+#if CONFIG_SUPERBLOCKS
+ if (miptr->mbmi.encoded_as_sb) {
+ cache[0] = cache[1] = cache[cm->mb_cols] = cache[cm->mb_cols + 1] =
+ miptr->mbmi.segment_id;
+ } else
+#endif
+ {
+ cache[0] = miptr[0].mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[1] = miptr[1].mbmi.segment_id;
+ if (!(cm->mb_rows & 1) || row < sb_rows - 1) {
+ cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id;
+ if (!(cm->mb_cols & 1) || col < sb_cols - 1)
+ cache[1] = miptr[1].mbmi.segment_id;
+ cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id;
+ }
+ }
+ }
+ segmap += 2 * cm->mb_cols;
+ segcache += 2 * cm->mb_cols;
+ mi += 2 * cm->mode_info_stride;
+ }
+}
+
static void set_default_lf_deltas(VP8_COMP *cpi) {
cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
@@ -1734,6 +1770,9 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
cm->prob_last_coded = 128;
cm->prob_gf_coded = 128;
cm->prob_intra_coded = 63;
+#if CONFIG_SUPERBLOCKS
+ cm->sb_coded = 200;
+#endif
for (i = 0; i < COMP_PRED_CONTEXTS; i++)
cm->prob_comppred[i] = 128;
@@ -1917,6 +1956,18 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) {
init_mv_ref_counts();
#endif
+#if CONFIG_SUPERBLOCKS
+ cpi->fn_ptr[BLOCK_32X32].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32);
+ cpi->fn_ptr[BLOCK_32X32].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32);
+ cpi->fn_ptr[BLOCK_32X32].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32);
+ cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h);
+ cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v);
+ cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv);
+ cpi->fn_ptr[BLOCK_32X32].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3);
+ cpi->fn_ptr[BLOCK_32X32].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8);
+ cpi->fn_ptr[BLOCK_32X32].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d);
+#endif
+
cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
@@ -3616,6 +3667,10 @@ static void encode_frame_to_data_rate
cpi->dummy_packing = 0;
vp8_pack_bitstream(cpi, dest, size);
+ if (cpi->mb.e_mbd.update_mb_segmentation_map) {
+ update_reference_segmentation_map(cpi);
+ }
+
#if CONFIG_PRED_FILTER
// Select the prediction filtering mode to use for the
// next frame based on the current frame selections
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index ff3a21107..7fb7dd2ff 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -359,7 +359,9 @@ enum {
BLOCK_8X8,
BLOCK_4X4,
BLOCK_16X16,
- BLOCK_MAX_SEGMENTS
+ BLOCK_MAX_SEGMENTS,
+ BLOCK_32X32 = BLOCK_MAX_SEGMENTS,
+ BLOCK_MAX_SB_SEGMENTS,
};
typedef struct VP8_COMP {
@@ -528,6 +530,10 @@ typedef struct VP8_COMP {
int cq_target_quality;
+#if CONFIG_SUPERBLOCKS
+ int sb_count;
+ int sb_ymode_count [VP8_I32X32_MODES];
+#endif
int ymode_count [VP8_YMODES]; /* intra MB type cts this frame */
int bmode_count [VP8_BINTRAMODES];
int i8x8_mode_count [VP8_I8X8_MODES];
@@ -628,7 +634,7 @@ typedef struct VP8_COMP {
vp8_full_search_fn_t full_search_sad;
vp8_refining_search_fn_t refining_search_sad;
vp8_diamond_search_fn_t diamond_search_sad;
- vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
+ vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SB_SEGMENTS];
uint64_t time_receive_data;
uint64_t time_compress_data;
uint64_t time_pick_lpf;
@@ -732,9 +738,6 @@ typedef struct VP8_COMP {
int droppable;
- // Global store for SB left contexts, one for each MB row in the SB
- ENTROPY_CONTEXT_PLANES left_context[2];
-
// TODO Do we still need this??
int update_context;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index ed5b5c96d..97b02f033 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -718,7 +718,7 @@ static void macro_block_yrd(MACROBLOCK *mb,
*Rate = vp8_rdcost_mby(mb);
}
-static int vp8_rdcost_mby_8x8(MACROBLOCK *mb) {
+static int vp8_rdcost_mby_8x8(MACROBLOCK *mb, int backup) {
int cost = 0;
int b;
MACROBLOCKD *xd = &mb->e_mbd;
@@ -726,11 +726,16 @@ static int vp8_rdcost_mby_8x8(MACROBLOCK *mb) {
ENTROPY_CONTEXT *ta;
ENTROPY_CONTEXT *tl;
- vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ if (backup) {
+ vpx_memcpy(&t_above,xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+ tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+ }
for (b = 0; b < 16; b += 4)
cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_Y_NO_DC,
@@ -775,7 +780,7 @@ static void macro_block_yrd_8x8(MACROBLOCK *mb,
*Distortion = (d >> 2);
// rate
- *Rate = vp8_rdcost_mby_8x8(mb);
+ *Rate = vp8_rdcost_mby_8x8(mb, 1);
}
#if CONFIG_TX16X16
@@ -823,6 +828,66 @@ static void copy_predictor(unsigned char *dst, const unsigned char *predictor) {
d[12] = p[12];
}
+#if CONFIG_SUPERBLOCKS
+static void super_block_yrd_8x8(MACROBLOCK *x,
+ int *rate,
+ int *distortion,
+ const VP8_ENCODER_RTCD *rtcd, int *skip)
+{
+ MACROBLOCKD *const xd = &x->e_mbd;
+ BLOCK *const by2 = x->block + 24;
+ BLOCKD *const bdy2 = xd->block + 24;
+ int d = 0, r = 0, n;
+ const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer;
+ int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride;
+ ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
+ ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
+ ENTROPY_CONTEXT_PLANES t_above[2];
+ ENTROPY_CONTEXT_PLANES t_left[2];
+ int skippable = 1;
+
+ vpx_memcpy(t_above, xd->above_context, sizeof(t_above));
+ vpx_memcpy(t_left, xd->left_context, sizeof(t_left));
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ vp8_subtract_mby_s_c(x->src_diff,
+ src + x_idx * 16 + y_idx * 16 * src_y_stride,
+ src_y_stride,
+ dst + x_idx * 16 + y_idx * 16 * dst_y_stride,
+ dst_y_stride);
+ vp8_transform_mby_8x8(x);
+ vp8_quantize_mby_8x8(x);
+
+ /* remove 1st order dc to properly combine 1st/2nd order distortion */
+ x->coeff[ 0] = 0;
+ x->coeff[ 64] = 0;
+ x->coeff[128] = 0;
+ x->coeff[192] = 0;
+ xd->dqcoeff[ 0] = 0;
+ xd->dqcoeff[ 64] = 0;
+ xd->dqcoeff[128] = 0;
+ xd->dqcoeff[192] = 0;
+
+ d += ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, 0);
+ d += ENCODEMB_INVOKE(&rtcd->encodemb, berr)(by2->coeff, bdy2->dqcoeff, 16);
+ xd->above_context = ta + x_idx;
+ xd->left_context = tl + y_idx;
+ r += vp8_rdcost_mby_8x8(x, 0);
+ skippable = skippable && mby_is_skippable_8x8(xd);
+ }
+
+ *distortion = (d >> 2);
+ *rate = r;
+ if (skip) *skip = skippable;
+ xd->above_context = ta;
+ xd->left_context = tl;
+ vpx_memcpy(xd->above_context, &t_above, sizeof(t_above));
+ vpx_memcpy(xd->left_context, &t_left, sizeof(t_left));
+}
+#endif
+
static void copy_predictor_8x8(unsigned char *dst, const unsigned char *predictor) {
const unsigned int *p = (const unsigned int *)predictor;
unsigned int *d = (unsigned int *)dst;
@@ -1062,6 +1127,45 @@ static int64_t rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rat
return RDCOST(mb->rdmult, mb->rddiv, cost, distortion);
}
+#if CONFIG_SUPERBLOCKS
+static int64_t rd_pick_intra_sby_mode(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion) {
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ int this_rate, this_rate_tokenonly;
+ int this_distortion;
+ int64_t best_rd = INT64_MAX, this_rd;
+
+ /* Y Search for 32x32 intra prediction mode */
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ x->e_mbd.mode_info_context->mbmi.mode = mode;
+ RECON_INVOKE(&cpi->common.rtcd.recon,
+ build_intra_predictors_sby_s)(&x->e_mbd);
+
+ super_block_yrd_8x8(x, &this_rate_tokenonly,
+ &this_distortion, IF_RTCD(&cpi->rtcd), NULL);
+ this_rate = this_rate_tokenonly +
+ x->mbmode_cost[x->e_mbd.frame_type]
+ [x->e_mbd.mode_info_context->mbmi.mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+ best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ }
+ }
+
+ x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
+
+ return best_rd;
+}
+#endif
static int64_t rd_pick_intra16x16mby_mode(VP8_COMP *cpi,
MACROBLOCK *x,
@@ -1372,18 +1476,23 @@ static int64_t rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
}
-static int rd_cost_mbuv_8x8(MACROBLOCK *mb) {
+static int rd_cost_mbuv_8x8(MACROBLOCK *mb, int backup) {
int b;
int cost = 0;
MACROBLOCKD *xd = &mb->e_mbd;
ENTROPY_CONTEXT_PLANES t_above, t_left;
ENTROPY_CONTEXT *ta, *tl;
- vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
- vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ if (backup) {
+ vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+ vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES));
- ta = (ENTROPY_CONTEXT *)&t_above;
- tl = (ENTROPY_CONTEXT *)&t_left;
+ ta = (ENTROPY_CONTEXT *)&t_above;
+ tl = (ENTROPY_CONTEXT *)&t_left;
+ } else {
+ ta = (ENTROPY_CONTEXT *)mb->e_mbd.above_context;
+ tl = (ENTROPY_CONTEXT *)mb->e_mbd.left_context;
+ }
for (b = 16; b < 24; b += 4)
cost += cost_coeffs(mb, xd->block + b, PLANE_TYPE_UV,
@@ -1393,6 +1502,54 @@ static int rd_cost_mbuv_8x8(MACROBLOCK *mb) {
return cost;
}
+#if CONFIG_SUPERBLOCKS
+static int64_t rd_inter32x32_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
+ int *distortion, int fullpixel, int *skip) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ int n, r = 0, d = 0;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ int skippable = 1;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
+ ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
+
+ memcpy(t_above, xd->above_context, sizeof(t_above));
+ memcpy(t_left, xd->left_context, sizeof(t_left));
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ vp8_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+
+ vp8_transform_mbuv_8x8(x);
+ vp8_quantize_mbuv_8x8(x);
+
+ xd->above_context = ta + x_idx;
+ xd->left_context = tl + y_idx;
+ r += rd_cost_mbuv_8x8(x, 0);
+ d += ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
+ skippable = skippable && mbuv_is_skippable_8x8(xd);
+ }
+
+ *rate = r;
+ *distortion = d;
+ if (skip) *skip = skippable;
+ xd->left_context = tl;
+ xd->above_context = ta;
+ memcpy(xd->above_context, t_above, sizeof(t_above));
+ memcpy(xd->left_context, t_left, sizeof(t_left));
+
+ return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
+}
+#endif
static int64_t rd_inter16x16_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
int *distortion, int fullpixel) {
@@ -1403,7 +1560,7 @@ static int64_t rd_inter16x16_uv_8x8(VP8_COMP *cpi, MACROBLOCK *x, int *rate,
vp8_quantize_mbuv_8x8(x);
- *rate = rd_cost_mbuv_8x8(x);
+ *rate = rd_cost_mbuv_8x8(x, 1);
*distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
@@ -1527,7 +1684,7 @@ static void rd_pick_intra_mbuv_mode_8x8(VP8_COMP *cpi,
vp8_quantize_mbuv_8x8(x);
- rate_to = rd_cost_mbuv_8x8(x);
+ rate_to = rd_cost_mbuv_8x8(x, 1);
rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][mbmi->uv_mode];
distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4;
@@ -1546,6 +1703,91 @@ static void rd_pick_intra_mbuv_mode_8x8(VP8_COMP *cpi,
mbmi->uv_mode = mode_selected;
}
+#if CONFIG_SUPERBLOCKS
+static void super_block_uvrd_8x8(MACROBLOCK *x,
+ int *rate,
+ int *distortion,
+ const VP8_ENCODER_RTCD *rtcd) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ int d = 0, r = 0, n;
+ const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer;
+ const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer;
+ int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride;
+ ENTROPY_CONTEXT_PLANES t_above[2], t_left[2];
+ ENTROPY_CONTEXT_PLANES *ta = xd->above_context;
+ ENTROPY_CONTEXT_PLANES *tl = xd->left_context;
+
+ memcpy(t_above, xd->above_context, sizeof(t_above));
+ memcpy(t_left, xd->left_context, sizeof(t_left));
+
+ for (n = 0; n < 4; n++) {
+ int x_idx = n & 1, y_idx = n >> 1;
+
+ vp8_subtract_mbuv_s_c(x->src_diff,
+ usrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride,
+ src_uv_stride,
+ udst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride,
+ dst_uv_stride);
+ vp8_transform_mbuv_8x8(x);
+ vp8_quantize_mbuv_8x8(x);
+
+ d += ENCODEMB_INVOKE(&rtcd->encodemb, mbuverr)(x) >> 2;
+ xd->above_context = ta + x_idx;
+ xd->left_context = tl + y_idx;
+ r += rd_cost_mbuv_8x8(x, 0);
+ }
+
+ xd->above_context = ta;
+ xd->left_context = tl;
+ *distortion = (d >> 2);
+ *rate = r;
+
+ xd->left_context = tl;
+ xd->above_context = ta;
+ memcpy(xd->above_context, t_above, sizeof(t_above));
+ memcpy(xd->left_context, t_left, sizeof(t_left));
+}
+
+static int64_t rd_pick_intra_sbuv_mode(VP8_COMP *cpi,
+ MACROBLOCK *x,
+ int *rate,
+ int *rate_tokenonly,
+ int *distortion) {
+ MB_PREDICTION_MODE mode;
+ MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected);
+ int64_t best_rd = INT64_MAX, this_rd;
+ int this_rate_tokenonly, this_rate;
+ int this_distortion;
+
+ for (mode = DC_PRED; mode <= TM_PRED; mode++) {
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
+ RECON_INVOKE(&cpi->rtcd.common->recon,
+ build_intra_predictors_sbuv_s)(&x->e_mbd);
+
+ super_block_uvrd_8x8(x, &this_rate_tokenonly,
+ &this_distortion, IF_RTCD(&cpi->rtcd));
+ this_rate = this_rate_tokenonly +
+ x->mbmode_cost[x->e_mbd.frame_type]
+ [x->e_mbd.mode_info_context->mbmi.mode];
+ this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
+
+ if (this_rd < best_rd) {
+ mode_selected = mode;
+ best_rd = this_rd;
+ *rate = this_rate;
+ *rate_tokenonly = this_rate_tokenonly;
+ *distortion = this_distortion;
+ }
+ }
+
+ x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
+
+ return best_rd;
+}
+#endif
+
int vp8_cost_mv_ref(VP8_COMP *cpi,
MB_PREDICTION_MODE m,
const int near_mv_ref_ct[4]) {
@@ -2568,25 +2810,33 @@ static void vp8_estimate_ref_frame_costs(VP8_COMP *cpi, int segment_id, unsigned
}
}
-static void store_coding_context(MACROBLOCK *x, int mb_index,
+static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int mode_index,
PARTITION_INFO *partition,
int_mv *ref_mv,
- int_mv *second_ref_mv) {
+ int_mv *second_ref_mv,
+ int single_pred_diff,
+ int comp_pred_diff,
+ int hybrid_pred_diff) {
MACROBLOCKD *xd = &x->e_mbd;
// Take a snapshot of the coding context so it can be
// restored if we decide to encode this way
- x->mb_context[mb_index].best_mode_index = mode_index;
- vpx_memcpy(&x->mb_context[mb_index].mic, xd->mode_info_context,
+ ctx->best_mode_index = mode_index;
+ vpx_memcpy(&ctx->mic, xd->mode_info_context,
sizeof(MODE_INFO));
- vpx_memcpy(&x->mb_context[mb_index].partition_info, partition,
- sizeof(PARTITION_INFO));
- x->mb_context[mb_index].best_ref_mv.as_int = ref_mv->as_int;
- x->mb_context[mb_index].second_best_ref_mv.as_int = second_ref_mv->as_int;
-
- // x->mb_context[mb_index].rddiv = x->rddiv;
- // x->mb_context[mb_index].rdmult = x->rdmult;
+ if (partition)
+ vpx_memcpy(&ctx->partition_info, partition,
+ sizeof(PARTITION_INFO));
+ ctx->best_ref_mv.as_int = ref_mv->as_int;
+ ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
+
+ // ctx[mb_index].rddiv = x->rddiv;
+ // ctx[mb_index].rdmult = x->rdmult;
+
+ ctx->single_pred_diff = single_pred_diff;
+ ctx->comp_pred_diff = comp_pred_diff;
+ ctx->hybrid_pred_diff = hybrid_pred_diff;
}
static void inter_mode_cost(VP8_COMP *cpi, MACROBLOCK *x, int this_mode,
@@ -3464,7 +3714,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
#endif
- if (x->skip)
+ if (x->skip && !mode_excluded)
break;
}
@@ -3557,16 +3807,36 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
}
end:
- // TODO Save these to add in only if MB coding mode is selected?
- for (i = 0; i < NB_PREDICTION_TYPES; ++i)
- cpi->rd_comp_pred_diff[i] += best_pred_diff[i];
+ store_coding_context(x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition,
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame],
+ best_pred_diff[0], best_pred_diff[1], best_pred_diff[2]);
+}
- store_coding_context(x, xd->mb_index, best_mode_index, &best_partition,
- &frame_best_ref_mv[mbmi->ref_frame],
- &frame_best_ref_mv[mbmi->second_ref_frame]);
+#if CONFIG_SUPERBLOCKS
+void vp8_rd_pick_intra_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
+ int *returnrate,
+ int *returndist) {
+ int rate_y, rate_uv;
+ int rate_y_tokenonly, rate_uv_tokenonly;
+ int error_y, error_uv;
+ int dist_y, dist_uv;
+
+ x->e_mbd.mode_info_context->mbmi.txfm_size = TX_8X8;
+
+ error_uv = rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
+ &dist_uv);
+ error_y = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
+ &dist_y);
+
+ // TODO(rbultje): add rate_uv
+ *returnrate = rate_y;
+ *returndist = dist_y + (dist_uv >> 2);
}
+#endif
-int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
+void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x,
+ int *returnrate, int *returndist) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
int64_t error4x4, error16x16;
@@ -3585,6 +3855,8 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
int rate8x8, dist8x8;
int mode16x16;
int mode8x8[2][4];
+ int dist;
+ int rateuv8, rateuv_tokenonly8, distuv8;
mbmi->ref_frame = INTRA_FRAME;
rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
@@ -3646,9 +3918,11 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
rate += rate4x4;
#endif
mbmi->mode = B_PRED;
+ dist = dist4x4;
} else {
mbmi->mode = mode16x16;
rate += rate16x16;
+ dist = dist16x16;
}
} else {
if (error4x4 < error8x8) {
@@ -3663,17 +3937,727 @@ int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x) {
rate += rate4x4;
#endif
mbmi->mode = B_PRED;
+ dist = dist4x4;
} else {
mbmi->mode = I8X8_PRED;
set_i8x8_block_modes(x, mode8x8);
rate += rate8x8;
+ dist = dist8x8;
}
}
- return rate;
+
+ // TODO(rbultje): should add rateuv here also
+ *returnrate = rate - rateuv;
+ *returndist = dist + (distuv >> 2);
}
-int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
- int recon_yoffset, int recon_uvoffset) {
+#if CONFIG_SUPERBLOCKS
+int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset, int recon_uvoffset,
+ int *returnrate, int *returndistortion) {
+ VP8_COMMON *cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ BLOCK *b = &x->block[0];
+ BLOCKD *d = &xd->block[0];
+ MB_PREDICTION_MODE this_mode;
+ MV_REFERENCE_FRAME ref_frame;
+ int mis = xd->mode_info_stride;
+ unsigned char segment_id = xd->mode_info_context->mbmi.segment_id;
+ int comp_pred;
+ int_mv best_ref_mv, second_best_ref_mv;
+ int_mv mode_mv[MB_MODE_COUNT];
+ int_mv frame_nearest_mv[4];
+ int_mv frame_near_mv[4];
+ int_mv frame_best_ref_mv[4];
+ int_mv mc_search_result[4];
+ int frame_mdcounts[4][4];
+ unsigned char *y_buffer[4];
+ unsigned char *u_buffer[4];
+ unsigned char *v_buffer[4];
+ static const int flag_list[4] = { 0, VP8_LAST_FLAG, VP8_GOLD_FLAG, VP8_ALT_FLAG };
+ int idx_list[4] = { 0, cpi->common.lst_fb_idx, cpi->common.gld_fb_idx, cpi->common.alt_fb_idx };
+ int mdcounts[4];
+ int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ int saddone = 0;
+ int sr = 0; // search range got from mv_pred(). It uses step_param levels. (0-7)
+ int64_t best_rd = INT64_MAX;
+ int64_t best_comp_rd = INT64_MAX;
+ int64_t best_single_rd = INT64_MAX;
+ int64_t best_hybrid_rd = INT64_MAX;
+ int64_t best_yrd = INT64_MAX;
+ MB_MODE_INFO best_mbmode;
+ int mode_index = 0;
+#if 0
+ PARTITION_INFO best_partition;
+ union b_mode_info best_bmodes[16];
+#endif
+ unsigned int ref_costs[MAX_REF_FRAMES];
+
+ xd->mode_info_context->mbmi.segment_id = segment_id;
+ vp8_estimate_ref_frame_costs(cpi, segment_id, ref_costs);
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ YV12_BUFFER_CONFIG *ref_buf = &cpi->common.yv12_fb[idx_list[ref_frame]];
+
+ vp8_find_near_mvs(xd, xd->mode_info_context,
+ xd->prev_mode_info_context,
+ &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
+ &frame_best_ref_mv[ref_frame], frame_mdcounts[ref_frame],
+ ref_frame, cpi->common.ref_frame_sign_bias);
+
+ y_buffer[ref_frame] = ref_buf->y_buffer + recon_yoffset;
+ u_buffer[ref_frame] = ref_buf->u_buffer + recon_uvoffset;
+ v_buffer[ref_frame] = ref_buf->v_buffer + recon_uvoffset;
+ }
+ mc_search_result[ref_frame].as_int = INVALID_MV;
+ }
+
+ for (mode_index = 0; mode_index < MAX_MODES; mode_index++) {
+ int_mv mvp;
+ int mode_excluded;
+ int64_t this_rd = INT64_MAX;
+ int disable_skip = 0;
+ int other_cost = 0;
+ int compmode_cost = 0;
+ int rate2 = 0;
+ int distortion2 = 0;
+ int rate_y = 0;
+ int rate_uv = 0;
+ int distortion_uv;
+ int distortion;
+ int skippable_y, skippable_uv;
+
+ // Test best rd so far against threshold for trying this mode.
+ if (best_rd <= cpi->rd_threshes[mode_index]) {
+ continue;
+ }
+
+ this_mode = vp8_mode_order[mode_index].mode;
+ ref_frame = vp8_mode_order[mode_index].ref_frame;
+ xd->mode_info_context->mbmi.ref_frame = ref_frame;
+ comp_pred = vp8_mode_order[mode_index].second_ref_frame != INTRA_FRAME;
+ xd->mode_info_context->mbmi.mode = this_mode;
+ xd->mode_info_context->mbmi.uv_mode = DC_PRED;
+#if 0 && CONFIG_PRED_FILTER
+ xd->mode_info_context->mbmi.pred_filter_enabled = 0;
+#endif
+
+#if 0 && CONFIG_COMP_INTRA_PRED
+ xd->mode_info_context->mbmi.second_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+ xd->mode_info_context->mbmi.second_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1);
+#endif
+
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
+ continue;
+
+ // not yet supported or not superblocky
+ // TODO(rbultje): support intra coding
+ if (ref_frame == INTRA_FRAME || this_mode == SPLITMV)
+ continue;
+
+ if (comp_pred) {
+ int second_ref;
+
+ if (ref_frame == ALTREF_FRAME) {
+ second_ref = LAST_FRAME;
+ } else {
+ second_ref = ref_frame + 1;
+ }
+ if (!(cpi->ref_frame_flags & flag_list[second_ref]))
+ continue;
+ xd->mode_info_context->mbmi.second_ref_frame = second_ref;
+
+ xd->second_pre.y_buffer = y_buffer[second_ref];
+ xd->second_pre.u_buffer = u_buffer[second_ref];
+ xd->second_pre.v_buffer = v_buffer[second_ref];
+ second_best_ref_mv = frame_best_ref_mv[second_ref];
+ mode_excluded = cm->comp_pred_mode == SINGLE_PREDICTION_ONLY;
+ } else {
+ xd->mode_info_context->mbmi.second_ref_frame = INTRA_FRAME;
+ mode_excluded = cm->comp_pred_mode == COMP_PREDICTION_ONLY;
+ }
+
+ xd->pre.y_buffer = y_buffer[ref_frame];
+ xd->pre.u_buffer = u_buffer[ref_frame];
+ xd->pre.v_buffer = v_buffer[ref_frame];
+ mode_mv[ZEROMV].as_int = 0;
+ mode_mv[NEARESTMV] = frame_nearest_mv[ref_frame];
+ mode_mv[NEARMV] = frame_near_mv[ref_frame];
+ best_ref_mv = frame_best_ref_mv[ref_frame];
+ vpx_memcpy(mdcounts, frame_mdcounts[ref_frame], sizeof(mdcounts));
+
+ // If the segment reference frame feature is enabled....
+ // then do nothing if the current ref frame is not allowed..
+ if (segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !check_segref(xd, segment_id, ref_frame)) {
+ continue;
+ }
+ // If the segment mode feature is enabled....
+ // then do nothing if the current mode is not allowed..
+ else if (segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ (this_mode != get_segdata(xd, segment_id, SEG_LVL_MODE))) {
+ continue;
+ }
+ // Disable this drop out case if either the mode or ref frame
+ // segment level feature is enabled for this segment. This is to
+ // prevent the possibility that we end up unable to pick any mode.
+ else if (!segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !segfeature_active(xd, segment_id, SEG_LVL_MODE)) {
+ // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
+ // unless ARNR filtering is enabled in which case we want
+ // an unfiltered alternative
+ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
+ if (this_mode != ZEROMV || ref_frame != ALTREF_FRAME) {
+ continue;
+ }
+ }
+ }
+
+ if (!comp_pred) {
+ switch (this_mode) {
+ case NEWMV: {
+ int thissme;
+ int bestsme = INT_MAX;
+ int step_param = cpi->sf.first_step;
+ int further_steps;
+ int n;
+ int do_refine = 1; /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
+ we will do a final 1-away diamond refining search */
+ int num00;
+
+ int sadpb = x->sadperbit16;
+ int_mv mvp_full;
+
+ int col_min = (best_ref_mv.as_mv.col >> 3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.col & 7) ? 1 : 0);
+ int row_min = (best_ref_mv.as_mv.row >> 3) - MAX_FULL_PEL_VAL + ((best_ref_mv.as_mv.row & 7) ? 1 : 0);
+ int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL;
+ int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL;
+
+ int tmp_col_min = x->mv_col_min;
+ int tmp_col_max = x->mv_col_max;
+ int tmp_row_min = x->mv_row_min;
+ int tmp_row_max = x->mv_row_max;
+
+ if (!saddone) {
+ vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]);
+ saddone = 1;
+ }
+
+ vp8_mv_pred(cpi, xs, xd->mode_info_context, &mvp,
+ xd->mode_info_context->mbmi.ref_frame,
+ cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]);
+
+ mvp_full.as_mv.col = mvp.as_mv.col >> 3;
+ mvp_full.as_mv.row = mvp.as_mv.row >> 3;
+
+ // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
+ if (x->mv_col_min < col_min)
+ x->mv_col_min = col_min;
+ if (x->mv_col_max > col_max)
+ x->mv_col_max = col_max;
+ if (x->mv_row_min < row_min)
+ x->mv_row_min = row_min;
+ if (x->mv_row_max > row_max)
+ x->mv_row_max = row_max;
+
+ // adjust search range according to sr from mv prediction
+ if (sr > step_param)
+ step_param = sr;
+
+ // Initial step/diamond search
+ {
+ bestsme = cpi->diamond_search_sad(x, b, d, &mvp_full, &d->bmi.as_mv.first,
+ step_param, sadpb, &num00,
+ &cpi->fn_ptr[BLOCK_32X32],
+ XMVCOST, &best_ref_mv);
+ mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int;
+
+ // Further step/diamond searches as necessary
+ n = 0;
+ further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+ n = num00;
+ num00 = 0;
+
+ /* If there won't be more n-step search, check to see if refining search is needed. */
+ if (n > further_steps)
+ do_refine = 0;
+
+ while (n < further_steps) {
+ n++;
+
+ if (num00)
+ num00--;
+ else {
+ thissme = cpi->diamond_search_sad(x, b, d, &mvp_full,
+ &d->bmi.as_mv.first, step_param + n, sadpb, &num00,
+ &cpi->fn_ptr[BLOCK_32X32],
+ XMVCOST, &best_ref_mv);
+
+ /* check to see if refining search is needed. */
+ if (num00 > (further_steps - n))
+ do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int;
+ } else {
+ d->bmi.as_mv.first.as_int = mode_mv[NEWMV].as_int;
+ }
+ }
+ }
+ }
+
+ /* final 1-away diamond refining search */
+ if (do_refine == 1) {
+ int search_range;
+
+ // It seems not a good way to set search_range. Need further investigation.
+ // search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
+ search_range = 8;
+
+ thissme = cpi->refining_search_sad(x, b, d, &d->bmi.as_mv.first, sadpb,
+ search_range, &cpi->fn_ptr[BLOCK_32X32],
+ XMVCOST, &best_ref_mv);
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int;
+ } else {
+ d->bmi.as_mv.first.as_int = mode_mv[NEWMV].as_int;
+ }
+ }
+
+ x->mv_col_min = tmp_col_min;
+ x->mv_col_max = tmp_col_max;
+ x->mv_row_min = tmp_row_min;
+ x->mv_row_max = tmp_row_max;
+
+ if (bestsme < INT_MAX) {
+ int dis; /* TODO: use dis in distortion calculation later. */
+ unsigned int sse;
+ cpi->find_fractional_mv_step(x, b, d, &d->bmi.as_mv.first, &best_ref_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[BLOCK_32X32],
+ XMVCOST, &dis, &sse);
+ }
+ mc_search_result[xd->mode_info_context->mbmi.ref_frame].as_int =
+ d->bmi.as_mv.first.as_int;
+
+ mode_mv[NEWMV].as_int = d->bmi.as_mv.first.as_int;
+
+ // Add the new motion vector cost to our rolling cost variable
+ rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv,
+ XMVCOST, 96,
+ xd->allow_high_precision_mv);
+ }
+
+ case NEARESTMV:
+ case NEARMV:
+ // Clip "next_nearest" so that it does not extend to far out of image
+ vp8_clamp_mv2(&mode_mv[this_mode], xd);
+
+ // Do not bother proceeding if the vector (from newmv,nearest or near) is 0,0 as this should then be coded using the zeromv mode.
+ if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) {
+ continue;
+ }
+
+ case ZEROMV:
+ // Trap vectors that reach beyond the UMV borders
+ // Note that ALL New MV, Nearest MV Near MV and Zero MV code drops through to this point
+ // because of the lack of break statements in the previous two cases.
+ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) ||
+ ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) {
+ continue;
+ }
+
+ vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]);
+
+#if CONFIG_PRED_FILTER
+ // Filtered prediction:
+ xd->mode_info_context->mbmi.pred_filter_enabled =
+ vp8_mode_order[mode_index].pred_filter_flag;
+ rate2 += vp8_cost_bit(cpi->common.prob_pred_filter_off,
+ xd->mode_info_context->mbmi.pred_filter_enabled);
+#endif
+
+ vp8_build_inter32x32_predictors_sb(xd,
+ xd->dst.y_buffer,
+ xd->dst.u_buffer,
+ xd->dst.v_buffer,
+ xd->dst.y_stride,
+ xd->dst.uv_stride);
+
+ compmode_cost =
+ vp8_cost_bit(get_pred_prob(cm, xd, PRED_COMP), 0);
+
+ if (cpi->active_map_enabled && x->active_ptr[0] == 0) {
+ x->skip = 1;
+ } else if (x->encode_breakout) {
+ unsigned int sse;
+ unsigned int var;
+ int threshold = (xd->block[0].dequant[1] *
+ xd->block[0].dequant[1] >> 4);
+
+ if (threshold < x->encode_breakout)
+ threshold = x->encode_breakout;
+
+ var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src),
+ b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse);
+
+ if (sse < threshold) {
+ unsigned int q2dc = xd->block[24].dequant[0];
+ /* If there is no codeable 2nd order dc
+ or a very small uniform pixel change change */
+ if ((sse - var < q2dc *q2dc >> 4) ||
+ (sse / 2 > var && sse - var < 64)) {
+ // Check u and v to make sure skip is ok
+ int sse2, sse3;
+ int var2 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
+ (x->src.u_buffer, x->src.uv_stride,
+ xd->dst.u_buffer, xd->dst.uv_stride, &sse2);
+ int var3 = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)
+ (x->src.v_buffer, x->src.uv_stride,
+ xd->dst.v_buffer, xd->dst.uv_stride, &sse3);
+ sse2 += sse3;
+ if (sse2 * 2 < threshold) {
+ x->skip = 1;
+ distortion2 = sse + sse2;
+ rate2 = 500;
+
+ /* for best_yrd calculation */
+ rate_uv = 0;
+ distortion_uv = sse2;
+
+ disable_skip = 1;
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ break;
+ }
+ }
+ }
+ }
+
+ // Add in the Mv/mode cost
+ rate2 += vp8_cost_mv_ref(cpi, this_mode, mdcounts);
+
+ // Y cost and distortion - FIXME support other transform sizes
+ super_block_yrd_8x8(x, &rate_y, &distortion,
+ IF_RTCD(&cpi->rtcd), &skippable_y);
+ rate2 += rate_y;
+ distortion2 += distortion;
+
+ rd_inter32x32_uv_8x8(cpi, x, &rate_uv, &distortion_uv,
+ cpi->common.full_pixel, &skippable_uv);
+
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+ mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY;
+ break;
+
+ default:
+ break;
+ }
+ } else { /* xd->mode_info_context->mbmi.second_ref_frame != 0 */
+ int ref1 = xd->mode_info_context->mbmi.ref_frame;
+ int ref2 = xd->mode_info_context->mbmi.second_ref_frame;
+
+ mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY;
+ switch (this_mode) {
+ case NEWMV:
+ if (mc_search_result[ref1].as_int == INVALID_MV ||
+ mc_search_result[ref2].as_int == INVALID_MV)
+ continue;
+ xd->mode_info_context->mbmi.mv[0].as_int = mc_search_result[ref1].as_int;
+ xd->mode_info_context->mbmi.mv[1].as_int = mc_search_result[ref2].as_int;
+ rate2 += vp8_mv_bit_cost(&mc_search_result[ref1],
+ &frame_best_ref_mv[ref1],
+ XMVCOST, 96,
+ xd->allow_high_precision_mv);
+ rate2 += vp8_mv_bit_cost(&mc_search_result[ref2],
+ &frame_best_ref_mv[ref2],
+ XMVCOST, 96,
+ xd->allow_high_precision_mv);
+ break;
+ case ZEROMV:
+ xd->mode_info_context->mbmi.mv[0].as_int = 0;
+ xd->mode_info_context->mbmi.mv[1].as_int = 0;
+ break;
+ case NEARMV:
+ if (frame_near_mv[ref1].as_int == 0 || frame_near_mv[ref2].as_int == 0) {
+ continue;
+ }
+ xd->mode_info_context->mbmi.mv[0].as_int = frame_near_mv[ref1].as_int;
+ xd->mode_info_context->mbmi.mv[1].as_int = frame_near_mv[ref2].as_int;
+ break;
+ case NEARESTMV:
+ if (frame_nearest_mv[ref1].as_int == 0 || frame_nearest_mv[ref2].as_int == 0) {
+ continue;
+ }
+ xd->mode_info_context->mbmi.mv[0].as_int = frame_nearest_mv[ref1].as_int;
+ xd->mode_info_context->mbmi.mv[1].as_int = frame_nearest_mv[ref2].as_int;
+ break;
+ default:
+ break;
+ }
+
+ /* Add in the Mv/mode cost */
+ rate2 += vp8_cost_mv_ref(cpi, this_mode, mdcounts);
+
+ vp8_clamp_mv2(&xd->mode_info_context->mbmi.mv[0], xd);
+ vp8_clamp_mv2(&xd->mode_info_context->mbmi.mv[1], xd);
+ if (((xd->mode_info_context->mbmi.mv[0].as_mv.row >> 3) < x->mv_row_min) ||
+ ((xd->mode_info_context->mbmi.mv[0].as_mv.row >> 3) > x->mv_row_max) ||
+ ((xd->mode_info_context->mbmi.mv[0].as_mv.col >> 3) < x->mv_col_min) ||
+ ((xd->mode_info_context->mbmi.mv[0].as_mv.col >> 3) > x->mv_col_max) ||
+ ((xd->mode_info_context->mbmi.mv[1].as_mv.row >> 3) < x->mv_row_min) ||
+ ((xd->mode_info_context->mbmi.mv[1].as_mv.row >> 3) > x->mv_row_max) ||
+ ((xd->mode_info_context->mbmi.mv[1].as_mv.col >> 3) < x->mv_col_min) ||
+ ((xd->mode_info_context->mbmi.mv[1].as_mv.col >> 3) > x->mv_col_max)) {
+ continue;
+ }
+
+ /* build first and second prediction */
+ vp8_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer,
+ xd->dst.u_buffer, xd->dst.v_buffer,
+ xd->dst.y_stride, xd->dst.uv_stride);
+
+ /* Y cost and distortion - TODO(rbultje) support other transform sizes */
+ super_block_yrd_8x8(x, &rate_y, &distortion,
+ IF_RTCD(&cpi->rtcd), &skippable_y);
+
+ rate2 += rate_y;
+ distortion2 += distortion;
+
+ /* UV cost and distortion */
+ rd_inter32x32_uv_8x8(cpi, x, &rate_uv, &distortion_uv,
+ cpi->common.full_pixel, &skippable_uv);
+
+ rate2 += rate_uv;
+ distortion2 += distortion_uv;
+
+ /* don't bother w/ skip, we would never have come here if skip were
+ * enabled */
+ xd->mode_info_context->mbmi.mode = this_mode;
+
+ /* We don't include the cost of the second reference here, because there
+ * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in
+ * other words if you present them in that order, the second one is
+ * always known if the first is known */
+ compmode_cost = vp8_cost_bit(get_pred_prob(cm, xd, PRED_COMP), 1);
+ }
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ rate2 += compmode_cost;
+ }
+
+
+ // Estimate the reference frame signaling cost and add it
+ // to the rolling cost variable.
+ rate2 += ref_costs[xd->mode_info_context->mbmi.ref_frame];
+
+ if (!disable_skip) {
+ // Test for the condition where skip block will be activated
+ // because there are no non zero coefficients and make any
+ // necessary adjustment for rate. Ignore if skip is coded at
+ // segment level as the cost wont have been added in.
+ if (cpi->common.mb_no_coeff_skip) {
+ int mb_skippable = skippable_y && skippable_uv;
+ int mb_skip_allowed;
+
+ // Is Mb level skip allowed for this mb.
+ mb_skip_allowed =
+ !segfeature_active(xd, segment_id, SEG_LVL_EOB) ||
+ get_segdata(xd, segment_id, SEG_LVL_EOB);
+
+ if (mb_skippable) {
+ // Back out the coefficient coding costs
+ rate2 -= (rate_y + rate_uv);
+ // for best_yrd calculation
+ rate_uv = 0;
+
+ if (mb_skip_allowed) {
+ int prob_skip_cost;
+
+ // Cost the skip mb case
+ vp8_prob skip_prob =
+ get_pred_prob(cm, xd, PRED_MBSKIP);
+
+ if (skip_prob) {
+ prob_skip_cost = vp8_cost_bit(skip_prob, 1);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+ }
+ // Add in the cost of the no skip flag.
+ else if (mb_skip_allowed) {
+ int prob_skip_cost = vp8_cost_bit(get_pred_prob(cm, xd,
+ PRED_MBSKIP), 0);
+ rate2 += prob_skip_cost;
+ other_cost += prob_skip_cost;
+ }
+ }
+
+ // Calculate the final RD estimate for this mode.
+ this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
+ }
+
+#if 0
+ // Keep record of best intra distortion
+ if ((xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) &&
+ (this_rd < best_intra_rd)) {
+ best_intra_rd = this_rd;
+ *returnintra = distortion2;
+ }
+#endif
+
+ if (!disable_skip && xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+ if (this_rd < best_comp_rd)
+ best_comp_rd = this_rd;
+ if (this_rd < best_single_rd)
+ best_single_rd = this_rd;
+ if (this_rd < best_hybrid_rd)
+ best_hybrid_rd = this_rd;
+ }
+
+ // Did this mode help.. i.e. is it the new best mode
+ if (this_rd < best_rd || x->skip) {
+ if (!mode_excluded) {
+#if 0
+ // Note index of best mode so far
+ best_mode_index = mode_index;
+
+ if (this_mode <= B_PRED) {
+ xd->mode_info_context->mbmi.uv_mode = uv_intra_mode_8x8;
+ /* required for left and above block mv */
+ xd->mode_info_context->mbmi.mv.as_int = 0;
+ }
+#endif
+
+ other_cost += ref_costs[xd->mode_info_context->mbmi.ref_frame];
+
+ /* Calculate the final y RD estimate for this mode */
+ best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost),
+ (distortion2 - distortion_uv));
+
+ *returnrate = rate2;
+ *returndistortion = distortion2;
+ best_rd = this_rd;
+ vpx_memcpy(&best_mbmode, &xd->mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+ }
+#if 0
+ // Testing this mode gave rise to an improvement in best error score. Lower threshold a bit for next time
+ cpi->rd_thresh_mult[mode_index] = (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT;
+ cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+#endif
+ }
+ // If the mode did not help improve the best error case then raise the threshold for testing that mode next time around.
+ else {
+#if 0
+ cpi->rd_thresh_mult[mode_index] += 4;
+
+ if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT)
+ cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT;
+
+ cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * cpi->rd_thresh_mult[mode_index];
+#endif
+ }
+
+ /* keep record of best compound/single-only prediction */
+ if (!disable_skip && xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) {
+ int single_rd, hybrid_rd, single_rate, hybrid_rate;
+
+ if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) {
+ single_rate = rate2 - compmode_cost;
+ hybrid_rate = rate2;
+ } else {
+ single_rate = rate2;
+ hybrid_rate = rate2 + compmode_cost;
+ }
+
+ single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
+ hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
+
+ if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME &&
+ single_rd < best_single_rd) {
+ best_single_rd = single_rd;
+ } else if (xd->mode_info_context->mbmi.second_ref_frame != INTRA_FRAME &&
+ single_rd < best_comp_rd) {
+ best_comp_rd = single_rd;
+ }
+ if (hybrid_rd < best_hybrid_rd) {
+ best_hybrid_rd = hybrid_rd;
+ }
+ }
+
+ if (x->skip && !mode_excluded)
+ break;
+ }
+
+ // TODO(rbultje) integrate with RD thresholding
+#if 0
+ // Reduce the activation RD thresholds for the best choice mode
+ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) &&
+ (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) {
+ int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2);
+
+ cpi->rd_thresh_mult[best_mode_index] =
+ (cpi->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ?
+ cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT;
+ cpi->rd_threshes[best_mode_index] =
+ (cpi->rd_baseline_thresh[best_mode_index] >> 7) * cpi->rd_thresh_mult[best_mode_index];
+ }
+#endif
+
+ // This code forces Altref,0,0 and skip for the frame that overlays a
+ // an alrtef unless Altref is filtered. However, this is unsafe if
+ // segment level coding of ref frame or mode is enabled for this
+ // segment.
+ if (!segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) &&
+ !segfeature_active(xd, segment_id, SEG_LVL_MODE) &&
+ cpi->is_src_frame_alt_ref &&
+ (cpi->oxcf.arnr_max_frames == 0) &&
+ (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) {
+ xd->mode_info_context->mbmi.mode = ZEROMV;
+ xd->mode_info_context->mbmi.ref_frame = ALTREF_FRAME;
+ xd->mode_info_context->mbmi.mv[0].as_int = 0;
+ xd->mode_info_context->mbmi.uv_mode = DC_PRED;
+ xd->mode_info_context->mbmi.mb_skip_coeff =
+ (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+ xd->mode_info_context->mbmi.partitioning = 0;
+
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+
+ if (best_rd != INT64_MAX)
+ store_coding_context(x, &x->sb_context[0], mode_index, NULL,
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame],
+ 0, 0, 0);
+ return best_rd;
+ }
+
+ // macroblock modes
+ vpx_memcpy(&xd->mode_info_context->mbmi, &best_mbmode,
+ sizeof(MB_MODE_INFO));
+ xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+
+ if (best_rd != INT64_MAX)
+ store_coding_context(x, &x->sb_context[0], mode_index, NULL,
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.ref_frame],
+ &frame_best_ref_mv[xd->mode_info_context->mbmi.second_ref_frame],
+ (best_single_rd == INT64_MAX) ? INT_MIN : (best_rd - best_single_rd),
+ (best_comp_rd == INT64_MAX) ? INT_MIN : (best_rd - best_comp_rd),
+ (best_hybrid_rd == INT64_MAX) ? INT_MIN : (best_rd - best_hybrid_rd));
+
+ return best_rd;
+}
+#endif
+
+void vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
+ int recon_yoffset,
+ int recon_uvoffset,
+ int *totalrate, int *totaldist) {
VP8_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
@@ -3694,17 +4678,6 @@ int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate,
&distortion, &intra_error);
- if (mbmi->ref_frame) {
- unsigned char pred_context;
-
- pred_context = get_pred_context(cm, xd, PRED_COMP);
-
- if (mbmi->second_ref_frame == INTRA_FRAME)
- cpi->single_pred_count[pred_context]++;
- else
- cpi->comp_pred_count[pred_context]++;
- }
-
/* restore cpi->zbin_mode_boost_enabled */
cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
}
@@ -3717,5 +4690,6 @@ int vp8cx_pick_mode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x,
x->mb_context[xd->mb_index].distortion = distortion;
x->mb_context[xd->mb_index].intra_error = intra_error;
- return rate;
+ *totalrate = rate;
+ *totaldist = distortion;
}
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index 2b5928de9..0e36a519d 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -18,7 +18,8 @@
extern void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue);
extern void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset,
int *returnrate, int *returndistortion, int64_t *returnintra);
-extern int vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x);
+extern void vp8_rd_pick_intra_mode(VP8_COMP *cpi, MACROBLOCK *x, int *r, int *d);
+extern void vp8_rd_pick_intra_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, int *r, int *d);
extern void vp8_mv_pred
(
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 78a87f392..4fdfd1186 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -13,29 +13,6 @@
#include "vpx_ports/config.h"
#include "vpx/vpx_integer.h"
-unsigned int vp8_sad16x16_c(
- const unsigned char *src_ptr,
- int src_stride,
- const unsigned char *ref_ptr,
- int ref_stride,
- int max_sad) {
-
- int r, c;
- unsigned int sad = 0;
-
- for (r = 0; r < 16; r++) {
- for (c = 0; c < 16; c++) {
- sad += abs(src_ptr[c] - ref_ptr[c]);
- }
-
- src_ptr += src_stride;
- ref_ptr += ref_stride;
- }
-
- return sad;
-}
-
-
static __inline
unsigned int sad_mx_n_c(
const unsigned char *src_ptr,
@@ -60,6 +37,21 @@ unsigned int sad_mx_n_c(
return sad;
}
+unsigned int vp8_sad32x32_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32);
+}
+
+unsigned int vp8_sad16x16_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ int max_sad) {
+ return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16);
+}
unsigned int vp8_sad8x8_c(
const unsigned char *src_ptr,
@@ -104,6 +96,7 @@ unsigned int vp8_sad4x4_c(
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4);
}
+
#if CONFIG_NEWBESTREFMV
unsigned int vp8_sad2x16_c(
const unsigned char *src_ptr,
@@ -122,6 +115,34 @@ unsigned int vp8_sad16x2_c(
return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 16, 2);
}
#endif
+
+void vp8_sad32x32x3_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned int *sad_array
+ ) {
+ sad_array[0] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+}
+
+void vp8_sad32x32x8_c(const unsigned char *src_ptr,
+ int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride,
+ unsigned short *sad_array
+ ) {
+ sad_array[0] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr, ref_stride, 0x7fffffff);
+ sad_array[1] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+ sad_array[2] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+ sad_array[3] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 3, ref_stride, 0x7fffffff);
+ sad_array[4] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+ sad_array[5] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+ sad_array[6] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 6, ref_stride, 0x7fffffff);
+ sad_array[7] = (unsigned short)vp8_sad32x32_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
void vp8_sad16x16x3_c(
const unsigned char *src_ptr,
int src_stride,
@@ -267,6 +288,18 @@ void vp8_sad4x4x8_c(
sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
}
+void vp8_sad32x32x4d_c(const unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr[],
+ int ref_stride,
+ unsigned int *sad_array
+ ) {
+ sad_array[0] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[0], ref_stride, 0x7fffffff);
+ sad_array[1] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[1], ref_stride, 0x7fffffff);
+ sad_array[2] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[2], ref_stride, 0x7fffffff);
+ sad_array[3] = vp8_sad32x32_c(src_ptr, src_stride, ref_ptr[3], ref_stride, 0x7fffffff);
+}
+
void vp8_sad16x16x4d_c(
const unsigned char *src_ptr,
int src_stride,
diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
index e9d02cdd4..e88b80d34 100644
--- a/vp8/encoder/segmentation.c
+++ b/vp8/encoder/segmentation.c
@@ -200,42 +200,59 @@ void choose_segmap_coding_method(VP8_COMP *cpi) {
// in the frame
xd->mode_info_context = cm->mi;
- for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
- for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) {
- segment_id = xd->mode_info_context->mbmi.segment_id;
-
- // Count the number of hits on each segment with no prediction
- no_pred_segcounts[segment_id]++;
-
- // Temporal prediction not allowed on key frames
- if (cm->frame_type != KEY_FRAME) {
- // Test to see if the segment id matches the predicted value.
- int seg_predicted =
- (segment_id == get_pred_mb_segid(cm, segmap_index));
-
- // Get the segment id prediction context
- pred_context =
- get_pred_context(cm, xd, PRED_SEG_ID);
-
- // Store the prediction status for this mb and update counts
- // as appropriate
- set_pred_flag(xd, PRED_SEG_ID, seg_predicted);
- temporal_predictor_count[pred_context][seg_predicted]++;
-
- if (!seg_predicted)
- // Update the "unpredicted" segment count
- t_unpred_seg_counts[segment_id]++;
- }
+ for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) {
+ for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) {
+ for (i = 0; i < 4; i++) {
+ static const int dx[4] = { +1, -1, +1, +1 };
+ static const int dy[4] = { 0, +1, 0, -1 };
+ int x_idx = i & 1, y_idx = i >> 1;
+
+ if (mb_col + x_idx >= cm->mb_cols ||
+ mb_row + y_idx >= cm->mb_rows) {
+ goto end;
+ }
+
+ segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx;
+ segment_id = xd->mode_info_context->mbmi.segment_id;
+
+ // Count the number of hits on each segment with no prediction
+ no_pred_segcounts[segment_id]++;
+
+ // Temporal prediction not allowed on key frames
+ if (cm->frame_type != KEY_FRAME) {
+ // Test to see if the segment id matches the predicted value.
+ int seg_predicted =
+ (segment_id == get_pred_mb_segid(cm, segmap_index));
- // Step on to the next mb
- xd->mode_info_context++;
+ // Get the segment id prediction context
+ pred_context =
+ get_pred_context(cm, xd, PRED_SEG_ID);
- // Step on to the next entry in the segment maps
- segmap_index++;
+ // Store the prediction status for this mb and update counts
+ // as appropriate
+ set_pred_flag(xd, PRED_SEG_ID, seg_predicted);
+ temporal_predictor_count[pred_context][seg_predicted]++;
+
+ if (!seg_predicted)
+ // Update the "unpredicted" segment count
+ t_unpred_seg_counts[segment_id]++;
+ }
+
+#if CONFIG_SUPERBLOCKS
+ if (xd->mode_info_context->mbmi.encoded_as_sb) {
+ assert(!i);
+ xd->mode_info_context += 2;
+ break;
+ }
+#endif
+ end:
+ xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride;
+ }
}
// this is to account for the border in mode_info_context
- xd->mode_info_context++;
+ xd->mode_info_context -= mb_col;
+ xd->mode_info_context += cm->mode_info_stride * 2;
}
// Work out probability tree for coding segments without prediction
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index da83d1261..a2fadfc4c 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -145,8 +145,18 @@ extern prototype_sad(vp8_variance_sad16x8);
#endif
extern prototype_sad(vp8_variance_sad16x16);
+#ifndef vp8_variance_sad32x32
+#define vp8_variance_sad32x32 vp8_sad32x32_c
+#endif
+extern prototype_sad(vp8_variance_sad32x32);
+
// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+#ifndef vp8_variance_sad32x32x3
+#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c
+#endif
+extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3);
+
#ifndef vp8_variance_sad16x16x3
#define vp8_variance_sad16x16x3 vp8_sad16x16x3_c
#endif
@@ -172,6 +182,11 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
#endif
extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
+#ifndef vp8_variance_sad32x32x8
+#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8);
+
#ifndef vp8_variance_sad16x16x8
#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
#endif
@@ -199,6 +214,11 @@ extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+#ifndef vp8_variance_sad32x32x4d
+#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c
+#endif
+extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d);
+
#ifndef vp8_variance_sad16x16x4d
#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c
#endif
@@ -258,6 +278,11 @@ extern prototype_variance(vp8_variance_var16x8);
#endif
extern prototype_variance(vp8_variance_var16x16);
+#ifndef vp8_variance_var32x32
+#define vp8_variance_var32x32 vp8_variance32x32_c
+#endif
+extern prototype_variance(vp8_variance_var32x32);
+
// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#ifndef vp8_variance_subpixvar4x4
@@ -285,26 +310,51 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
#endif
extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
+#ifndef vp8_variance_subpixvar32x32
+#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c
+#endif
+extern prototype_subpixvariance(vp8_variance_subpixvar32x32);
+
#ifndef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_h);
+#ifndef vp8_variance_halfpixvar32x32_h
+#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar32x32_h);
+
#ifndef vp8_variance_halfpixvar16x16_v
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_v);
+#ifndef vp8_variance_halfpixvar32x32_v
+#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar32x32_v);
+
#ifndef vp8_variance_halfpixvar16x16_hv
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
+#ifndef vp8_variance_halfpixvar32x32_hv
+#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar32x32_hv);
+
#ifndef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
#endif
extern prototype_subpixvariance(vp8_variance_subpixmse16x16);
+#ifndef vp8_variance_subpixmse32x32
+#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c
+#endif
+extern prototype_subpixvariance(vp8_variance_subpixmse32x32);
+
// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#ifndef vp8_variance_getmbss
@@ -349,38 +399,66 @@ typedef struct {
vp8_sad_fn_t sad8x16;
vp8_sad_fn_t sad16x8;
vp8_sad_fn_t sad16x16;
+#if CONFIG_SUPERBLOCKS
+ vp8_sad_fn_t sad32x32;
+#endif
vp8_variance_fn_t var4x4;
vp8_variance_fn_t var8x8;
vp8_variance_fn_t var8x16;
vp8_variance_fn_t var16x8;
vp8_variance_fn_t var16x16;
+#if CONFIG_SUPERBLOCKS
+ vp8_variance_fn_t var32x32;
+#endif
vp8_subpixvariance_fn_t subpixvar4x4;
vp8_subpixvariance_fn_t subpixvar8x8;
vp8_subpixvariance_fn_t subpixvar8x16;
vp8_subpixvariance_fn_t subpixvar16x8;
vp8_subpixvariance_fn_t subpixvar16x16;
+#if CONFIG_SUPERBLOCKS
+ vp8_subpixvariance_fn_t subpixvar32x32;
+#endif
vp8_variance_fn_t halfpixvar16x16_h;
+ vp8_variance_fn_t halfpixvar32x32_h;
vp8_variance_fn_t halfpixvar16x16_v;
+#if CONFIG_SUPERBLOCKS
+ vp8_variance_fn_t halfpixvar32x32_v;
+#endif
vp8_variance_fn_t halfpixvar16x16_hv;
+#if CONFIG_SUPERBLOCKS
+ vp8_variance_fn_t halfpixvar32x32_hv;
+#endif
vp8_subpixvariance_fn_t subpixmse16x16;
+#if CONFIG_SUPERBLOCKS
+ vp8_subpixvariance_fn_t subpixmse32x32;
+#endif
vp8_getmbss_fn_t getmbss;
vp8_variance_fn_t mse16x16;
+#if CONFIG_SUPERBLOCKS
+ vp8_sad_multi_fn_t sad32x32x3;
+#endif
vp8_sad_multi_fn_t sad16x16x3;
vp8_sad_multi_fn_t sad16x8x3;
vp8_sad_multi_fn_t sad8x16x3;
vp8_sad_multi_fn_t sad8x8x3;
vp8_sad_multi_fn_t sad4x4x3;
+#if CONFIG_SUPERBLOCKS
+ vp8_sad_multi1_fn_t sad32x32x8;
+#endif
vp8_sad_multi1_fn_t sad16x16x8;
vp8_sad_multi1_fn_t sad16x8x8;
vp8_sad_multi1_fn_t sad8x16x8;
vp8_sad_multi1_fn_t sad8x8x8;
vp8_sad_multi1_fn_t sad4x4x8;
+#if CONFIG_SUPERBLOCKS
+ vp8_sad_multi_d_fn_t sad32x32x4d;
+#endif
vp8_sad_multi_d_fn_t sad16x16x4d;
vp8_sad_multi_d_fn_t sad16x8x4d;
vp8_sad_multi_d_fn_t sad8x16x4d;
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 0b9d569b0..cbe2a51d6 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -55,6 +55,20 @@ static void variance(
}
}
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_variance32x32_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
+ *sse = var;
+ return (var - ((avg * avg) >> 10));
+}
+#endif
unsigned int vp8_variance16x16_c(
const unsigned char *src_ptr,
@@ -334,6 +348,27 @@ unsigned int vp8_sub_pixel_variance16x16_c
return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
}
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_sub_pixel_variance32x32_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ unsigned short FData3[33 * 32]; // Temp data bufffer used in filtering
+ unsigned char temp2[36 * 32];
+ const short *HFilter, *VFilter;
+
+ HFilter = vp8_bilinear_filters[xoffset];
+ VFilter = vp8_bilinear_filters[yoffset];
+
+ var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
+ var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
+
+ return vp8_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
+}
+#endif
unsigned int vp8_variance_halfpixvar16x16_h_c(
const unsigned char *src_ptr,
@@ -345,17 +380,38 @@ unsigned int vp8_variance_halfpixvar16x16_h_c(
ref_ptr, recon_stride, sse);
}
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_variance_halfpixvar32x32_h_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
+ ref_ptr, recon_stride, sse);
+}
+#endif
+
-unsigned int vp8_variance_halfpixvar16x16_v_c(
+unsigned int vp8_variance_halfpixvar16x16_v_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
+ ref_ptr, recon_stride, sse);
+}
+
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_variance_halfpixvar32x32_v_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse) {
- return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
+ return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
ref_ptr, recon_stride, sse);
}
-
+#endif
unsigned int vp8_variance_halfpixvar16x16_hv_c(
const unsigned char *src_ptr,
@@ -367,6 +423,16 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
ref_ptr, recon_stride, sse);
}
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_variance_halfpixvar32x32_hv_c(const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ return vp8_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
+ ref_ptr, recon_stride, sse);
+}
+#endif
unsigned int vp8_sub_pixel_mse16x16_c
(
@@ -382,6 +448,19 @@ unsigned int vp8_sub_pixel_mse16x16_c
return *sse;
}
+#if CONFIG_SUPERBLOCKS
+unsigned int vp8_sub_pixel_mse32x32_c(const unsigned char *src_ptr,
+ int src_pixels_per_line,
+ int xoffset,
+ int yoffset,
+ const unsigned char *dst_ptr,
+ int dst_pixels_per_line,
+ unsigned int *sse) {
+ vp8_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
+ return *sse;
+}
+#endif
+
unsigned int vp8_sub_pixel_variance16x8_c
(
const unsigned char *src_ptr,