diff options
Diffstat (limited to 'vp9/encoder')
26 files changed, 1537 insertions, 5141 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 3ab67cd8c..6624f07eb 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -281,10 +281,6 @@ static void sb_kfwrite_ymode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_sb_kf_ymode_encodings + m); } -static void write_i8x8_mode(vp9_writer *bc, int m, const vp9_prob *p) { - write_token(bc, vp9_i8x8_mode_tree, p, vp9_i8x8_mode_encodings + m); -} - static void write_uv_mode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_uv_mode_tree, p, vp9_uv_mode_encodings + m); } @@ -302,10 +298,6 @@ static void write_kf_bmode(vp9_writer *bc, int m, const vp9_prob *p) { write_token(bc, vp9_kf_bmode_tree, p, vp9_kf_bmode_encodings + m); } -static void write_split(vp9_writer *bc, int x, const vp9_prob *p) { - write_token(bc, vp9_mbsplit_tree, p, vp9_mbsplit_encodings + x); -} - static int prob_update_savings(const unsigned int *ct, const vp9_prob oldp, const vp9_prob newp, const vp9_prob upd) { @@ -508,60 +500,9 @@ static void write_nmv(VP9_COMP *cpi, vp9_writer *bc, // It should only be called if a segment map update is indicated. static void write_mb_segid(vp9_writer *bc, const MB_MODE_INFO *mi, const MACROBLOCKD *xd) { - // Encode the MB segment id. - int seg_id = mi->segment_id; - - if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { - switch (seg_id) { - case 0: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[2]); - break; - case 1: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[2]); - break; - case 2: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[1]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[3]); - break; - case 3: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[1]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[3]); - break; - case 4: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[4]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[5]); - break; - case 5: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[4]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[5]); - break; - case 6: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[4]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[6]); - break; - case 7: - vp9_write(bc, 1, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[4]); - vp9_write(bc, 1, xd->mb_segment_tree_probs[6]); - break; - - // TRAP.. This should not happen - default: - vp9_write(bc, 0, xd->mb_segment_tree_probs[0]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[1]); - vp9_write(bc, 0, xd->mb_segment_tree_probs[2]); - break; - } - } + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) + treed_write(bc, vp9_segment_tree, xd->mb_segment_tree_probs, + mi->segment_id, 3); } // This function encodes the reference frame @@ -718,7 +659,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, active_section = 6; #endif - if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16) + if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8) write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); else write_ymode(bc, mode, pc->fc.ymode_prob); @@ -728,21 +669,10 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, do { write_bmode(bc, m->bmi[j].as_mode.first, pc->fc.bmode_prob); - } while (++j < 16); - } - if (mode == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - pc->fc.i8x8_mode_prob); - } else { - write_uv_mode(bc, mi->uv_mode, - pc->fc.uv_mode_prob[mode]); + } while (++j < 4); } + write_uv_mode(bc, mi->uv_mode, + pc->fc.uv_mode_prob[mode]); } else { vp9_prob mv_ref_p[VP9_MVREFS - 1]; @@ -754,7 +684,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, // If segment skip is not enabled code the mode. if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) { - if (mi->sb_type > BLOCK_SIZE_MB16X16) { + if (mi->sb_type > BLOCK_SIZE_SB8X8) { write_sb_mv_ref(bc, mode, mv_ref_p); } else { write_mv_ref(bc, mode, mv_ref_p); @@ -824,26 +754,16 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, ++count_mb_seg[mi->partitioning]; #endif - write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); - cpi->mbsplit_count[mi->partitioning]++; - do { B_PREDICTION_MODE blockmode; int_mv blockmv; - const int *const L = vp9_mbsplits[mi->partitioning]; int k = -1; /* first block in subset j */ int mv_contz; int_mv leftmv, abovemv; blockmode = cpi->mb.partition_info->bmi[j].mode; blockmv = cpi->mb.partition_info->bmi[j].mv; -#if CONFIG_DEBUG - while (j != L[++k]) - if (k >= 16) - assert(0); -#else - while (j != L[++k]); -#endif + k = j; leftmv.as_int = left_block_mv(xd, m, k); abovemv.as_int = above_block_mv(m, k, mis); mv_contz = vp9_mv_cont(&leftmv, &abovemv); @@ -875,16 +795,15 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, } } - if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && !(mode == SPLITMV && - mi->partitioning == PARTITIONING_4X4))) && + if (((rf == INTRA_FRAME && mode != I4X4_PRED) || + (rf != INTRA_FRAME && mode != SPLITMV)) && pc->txfm_mode == TX_MODE_SELECT && - !(skip_coeff || vp9_segfeature_active(xd, segment_id, - SEG_LVL_SKIP))) { + !(skip_coeff || vp9_segfeature_active(xd, segment_id, + SEG_LVL_SKIP))) { TX_SIZE sz = mi->txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { + if (mi->sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); if (mi->sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); @@ -912,7 +831,7 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, vp9_write(bc, skip_coeff, vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } - if (m->mbmi.sb_type > BLOCK_SIZE_MB16X16) + if (m->mbmi.sb_type > BLOCK_SIZE_SB8X8) sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); else kfwrite_ymode(bc, ym, c->kf_ymode_prob[c->kf_ymode_probs_index]); @@ -921,35 +840,26 @@ static void write_mb_modes_kf(const VP9_COMP *cpi, int i = 0; do { const B_PREDICTION_MODE a = above_block_mode(m, i, mis); - const B_PREDICTION_MODE l = (xd->left_available || (i & 3)) ? + const B_PREDICTION_MODE l = (xd->left_available || + (i & 1)) ? left_block_mode(m, i) : B_DC_PRED; const int bm = m->bmi[i].as_mode.first; #ifdef ENTROPY_STATS ++intra_mode_stats [A] [L] [bm]; #endif - write_kf_bmode(bc, bm, c->kf_bmode_prob[a][l]); - } while (++i < 16); + } while (++i < 4); } - if (ym == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[0].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[2].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[8].as_mode.first); fflush(stdout); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, c->fc.i8x8_mode_prob); - // printf(" mode: %d\n", m->bmi[10].as_mode.first); fflush(stdout); - } else - write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); - if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && + write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); + + if (ym != I4X4_PRED && c->txfm_mode == TX_MODE_SELECT && !(skip_coeff || vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { TX_SIZE sz = m->mbmi.txfm_size; // FIXME(rbultje) code ternary symbol once all experiments are merged vp9_write(bc, sz != TX_4X4, c->prob_tx[0]); - if (sz != TX_4X4 && ym <= TM_PRED) { + if (m->mbmi.sb_type >= BLOCK_SIZE_MB16X16 && sz != TX_4X4) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); if (m->mbmi.sb_type >= BLOCK_SIZE_SB32X32 && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); @@ -1162,45 +1072,34 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, else assert(0); - if (bsize > BLOCK_SIZE_MB16X16) { + if (bsize > BLOCK_SIZE_SB8X8) { int pl; xd->left_seg_context = - cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3); - xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8); + cm->left_seg_context + ((mi_row >> 1) & 3); + xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); pl = partition_plane_context(xd, bsize); // encode the partition information write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl], vp9_partition_encodings + partition); } + subsize = get_subsize(bsize, partition); + switch (partition) { case PARTITION_NONE: - subsize = bsize; write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); break; case PARTITION_HORZ: - subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB64X32 : - BLOCK_SIZE_SB32X16; write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); if ((mi_row + bh) < cm->mi_rows) write_modes_b(cpi, m + bh * mis, bc, tok, tok_end, mi_row + bh, mi_col); break; case PARTITION_VERT: - subsize = (bsize == BLOCK_SIZE_SB64X64) ? BLOCK_SIZE_SB32X64 : - BLOCK_SIZE_SB16X32; write_modes_b(cpi, m, bc, tok, tok_end, mi_row, mi_col); if ((mi_col + bw) < cm->mi_cols) write_modes_b(cpi, m + bw, bc, tok, tok_end, mi_row, mi_col + bw); break; case PARTITION_SPLIT: - // TODO(jingning): support recursive partitioning down to 16x16 as for - // now. need to merge in 16x8, 8x16, 8x8, and smaller partitions. - if (bsize == BLOCK_SIZE_SB64X64) - subsize = BLOCK_SIZE_SB32X32; - else if (bsize == BLOCK_SIZE_SB32X32) - subsize = BLOCK_SIZE_MB16X16; - else - assert(0); for (n = 0; n < 4; n++) { int j = n >> 1, i = n & 0x01; write_modes_sb(cpi, m + j * bs * mis + i * bs, bc, tok, tok_end, @@ -1212,11 +1111,11 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, } // update partition context - if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_SB32X32)) + if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16)) return; - xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3); - xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8); + xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3); + xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); update_partition_context(xd, subsize, bsize); } @@ -1233,12 +1132,12 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, for (mi_row = c->cur_tile_mi_row_start; mi_row < c->cur_tile_mi_row_end; - mi_row += (4 << CONFIG_SB8X8), m_ptr += (4 << CONFIG_SB8X8) * mis) { + mi_row += 8, m_ptr += 8 * mis) { m = m_ptr; vpx_memset(c->left_seg_context, 0, sizeof(c->left_seg_context)); for (mi_col = c->cur_tile_mi_col_start; mi_col < c->cur_tile_mi_col_end; - mi_col += (4 << CONFIG_SB8X8), m += (4 << CONFIG_SB8X8)) + mi_col += 8, m += 8) write_modes_sb(cpi, m, bc, tok, tok_end, mi_row, mi_col, BLOCK_SIZE_SB64X64); } @@ -1719,16 +1618,147 @@ static void segment_reference_frames(VP9_COMP *cpi) { } } -void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, - unsigned long *size) { +static void encode_loopfilter(MACROBLOCKD *xd, vp9_writer *w) { + int i; + + // Write out loop filter deltas applied at the MB level based on mode or + // ref frame (if they are enabled). + vp9_write_bit(w, xd->mode_ref_lf_delta_enabled); + + if (xd->mode_ref_lf_delta_enabled) { + // Do the deltas need to be updated + vp9_write_bit(w, xd->mode_ref_lf_delta_update); + if (xd->mode_ref_lf_delta_update) { + // Send update + for (i = 0; i < MAX_REF_LF_DELTAS; i++) { + const int delta = xd->ref_lf_deltas[i]; + + // Frame level data + if (delta != xd->last_ref_lf_deltas[i]) { + xd->last_ref_lf_deltas[i] = delta; + vp9_write_bit(w, 1); + + if (delta > 0) { + vp9_write_literal(w, delta & 0x3F, 6); + vp9_write_bit(w, 0); // sign + } else { + assert(delta < 0); + vp9_write_literal(w, (-delta) & 0x3F, 6); + vp9_write_bit(w, 1); // sign + } + } else { + vp9_write_bit(w, 0); + } + } + + // Send update + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { + const int delta = xd->mode_lf_deltas[i]; + if (delta != xd->last_mode_lf_deltas[i]) { + xd->last_mode_lf_deltas[i] = delta; + vp9_write_bit(w, 1); + + if (delta > 0) { + vp9_write_literal(w, delta & 0x3F, 6); + vp9_write_bit(w, 0); // sign + } else { + assert(delta < 0); + vp9_write_literal(w, (-delta) & 0x3F, 6); + vp9_write_bit(w, 1); // sign + } + } else { + vp9_write_bit(w, 0); + } + } + } + } +} + +static void encode_segmentation(VP9_COMP *cpi, vp9_writer *w) { int i, j; + VP9_COMMON *const pc = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + + vp9_write_bit(w, xd->segmentation_enabled); + if (!xd->segmentation_enabled) + return; + + // Segmentation map + vp9_write_bit(w, xd->update_mb_segmentation_map); +#if CONFIG_IMPLICIT_SEGMENTATION + vp9_write_bit(w, xd->allow_implicit_segment_update); +#endif + if (xd->update_mb_segmentation_map) { + // Select the coding strategy (temporal or spatial) + vp9_choose_segmap_coding_method(cpi); + // Write out probabilities used to decode unpredicted macro-block segments + for (i = 0; i < MB_SEG_TREE_PROBS; i++) { + const int prob = xd->mb_segment_tree_probs[i]; + if (prob != MAX_PROB) { + vp9_write_bit(w, 1); + vp9_write_prob(w, prob); + } else { + vp9_write_bit(w, 0); + } + } + + // Write out the chosen coding method. + vp9_write_bit(w, pc->temporal_update); + if (pc->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) { + const int prob = pc->segment_pred_probs[i]; + if (prob != MAX_PROB) { + vp9_write_bit(w, 1); + vp9_write_prob(w, prob); + } else { + vp9_write_bit(w, 0); + } + } + } + } + + // Segmentation data + vp9_write_bit(w, xd->update_mb_segmentation_data); + // segment_reference_frames(cpi); + if (xd->update_mb_segmentation_data) { + vp9_write_bit(w, xd->mb_segment_abs_delta); + + for (i = 0; i < MAX_MB_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + const int data = vp9_get_segdata(xd, i, j); + const int data_max = vp9_seg_feature_data_max(j); + + if (vp9_segfeature_active(xd, i, j)) { + vp9_write_bit(w, 1); + + if (vp9_is_segfeature_signed(j)) { + if (data < 0) { + vp9_encode_unsigned_max(w, -data, data_max); + vp9_write_bit(w, 1); + } else { + vp9_encode_unsigned_max(w, data, data_max); + vp9_write_bit(w, 0); + } + } else { + vp9_encode_unsigned_max(w, data, data_max); + } + } else { + vp9_write_bit(w, 0); + } + } + } + } +} + +void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, unsigned long *size) { + int i; VP9_HEADER oh; VP9_COMMON *const pc = &cpi->common; vp9_writer header_bc, residual_bc; MACROBLOCKD *const xd = &cpi->mb.e_mbd; int extra_bytes_packed = 0; - unsigned char *cx_data = dest; + uint8_t *cx_data = dest; oh.show_frame = (int) pc->show_frame; oh.type = (int)pc->frame_type; @@ -1798,60 +1828,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } #endif - // Write out loop filter deltas applied at the MB level based on mode or ref frame (if they are enabled). - vp9_write_bit(&header_bc, (xd->mode_ref_lf_delta_enabled) ? 1 : 0); - - if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated - vp9_write_bit(&header_bc, xd->mode_ref_lf_delta_update); - if (xd->mode_ref_lf_delta_update) { - // Send update - for (i = 0; i < MAX_REF_LF_DELTAS; i++) { - const int delta = xd->ref_lf_deltas[i]; - - // Frame level data - if (delta != xd->last_ref_lf_deltas[i]) { - xd->last_ref_lf_deltas[i] = delta; - vp9_write_bit(&header_bc, 1); - - if (delta > 0) { - vp9_write_literal(&header_bc, delta & 0x3F, 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - assert(delta < 0); - vp9_write_literal(&header_bc, (-delta) & 0x3F, 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - - // Send update - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - const int delta = xd->mode_lf_deltas[i]; - - if (delta != xd->last_mode_lf_deltas[i]) { - xd->last_mode_lf_deltas[i] = delta; - vp9_write_bit(&header_bc, 1); - - if (delta > 0) { - vp9_write_literal(&header_bc, delta & 0x3F, 6); - vp9_write_bit(&header_bc, 0); // sign - } else { - assert(delta < 0); - vp9_write_literal(&header_bc, (-delta) & 0x3F, 6); - vp9_write_bit(&header_bc, 1); // sign - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - - // TODO(jkoleszar): remove these unused bits - vp9_write_literal(&header_bc, 0, 2); + encode_loopfilter(xd, &header_bc); // Frame Q baseline quantizer index vp9_write_literal(&header_bc, pc->base_qindex, QINDEX_BITS); @@ -1904,9 +1881,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_write_literal(&header_bc, cpi->gld_fb_idx, NUM_REF_FRAMES_LG2); vp9_write_literal(&header_bc, cpi->alt_fb_idx, NUM_REF_FRAMES_LG2); - // Indicate reference frame sign bias for Golden and ARF frames (always 0 for last frame buffer) - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); - vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); + // Indicate the sign bias for each reference frame buffer. + for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { + vp9_write_bit(&header_bc, pc->ref_frame_sign_bias[LAST_FRAME + i]); + } // Signal whether to allow high MV precision vp9_write_bit(&header_bc, (xd->allow_high_precision_mv) ? 1 : 0); @@ -1960,87 +1938,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, active_section = 7; #endif - // Signal whether or not Segmentation is enabled - vp9_write_bit(&header_bc, (xd->segmentation_enabled) ? 1 : 0); - - // Indicate which features are enabled - if (xd->segmentation_enabled) { - // Indicate whether or not the segmentation map is being updated. - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_map) ? 1 : 0); -#if CONFIG_IMPLICIT_SEGMENTATION - vp9_write_bit(&header_bc, (xd->allow_implicit_segment_update) ? 1 : 0); -#endif - - // If it is, then indicate the method that will be used. - if (xd->update_mb_segmentation_map) { - // Select the coding strategy (temporal or spatial) - vp9_choose_segmap_coding_method(cpi); - // Send the tree probabilities used to decode unpredicted - // macro-block segments - for (i = 0; i < MB_SEG_TREE_PROBS; i++) { - const int prob = xd->mb_segment_tree_probs[i]; - if (prob != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_prob(&header_bc, prob); - } else { - vp9_write_bit(&header_bc, 0); - } - } - - // Write out the chosen coding method. - vp9_write_bit(&header_bc, (pc->temporal_update) ? 1 : 0); - if (pc->temporal_update) { - for (i = 0; i < PREDICTION_PROBS; i++) { - const int prob = pc->segment_pred_probs[i]; - if (prob != 255) { - vp9_write_bit(&header_bc, 1); - vp9_write_prob(&header_bc, prob); - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - - vp9_write_bit(&header_bc, (xd->update_mb_segmentation_data) ? 1 : 0); - - // segment_reference_frames(cpi); - - if (xd->update_mb_segmentation_data) { - vp9_write_bit(&header_bc, (xd->mb_segment_abs_delta) ? 1 : 0); - - // For each segments id... - for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // For each segmentation codable feature... - for (j = 0; j < SEG_LVL_MAX; j++) { - const int8_t data = vp9_get_segdata(xd, i, j); - const int data_max = vp9_seg_feature_data_max(j); - - // If the feature is enabled... - if (vp9_segfeature_active(xd, i, j)) { - vp9_write_bit(&header_bc, 1); - - // Is the segment data signed.. - if (vp9_is_segfeature_signed(j)) { - // Encode the relevant feature data - if (data < 0) { - vp9_encode_unsigned_max(&header_bc, -data, data_max); - vp9_write_bit(&header_bc, 1); - } else { - vp9_encode_unsigned_max(&header_bc, data, data_max); - vp9_write_bit(&header_bc, 0); - } - } else { - // Unsigned data element so no sign bit needed - vp9_encode_unsigned_max(&header_bc, data, data_max); - } - } else { - vp9_write_bit(&header_bc, 0); - } - } - } - } - } + encode_segmentation(cpi, &header_bc); // Encode the common prediction model status flag probability updates for // the reference frame @@ -2153,15 +2051,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, vp9_copy(cpi->common.fc.pre_uv_mode_prob, cpi->common.fc.uv_mode_prob); vp9_copy(cpi->common.fc.pre_bmode_prob, cpi->common.fc.bmode_prob); vp9_copy(cpi->common.fc.pre_sub_mv_ref_prob, cpi->common.fc.sub_mv_ref_prob); - vp9_copy(cpi->common.fc.pre_mbsplit_prob, cpi->common.fc.mbsplit_prob); - vp9_copy(cpi->common.fc.pre_i8x8_mode_prob, cpi->common.fc.i8x8_mode_prob); vp9_copy(cpi->common.fc.pre_partition_prob, cpi->common.fc.partition_prob); cpi->common.fc.pre_nmvc = cpi->common.fc.nmvc; #if CONFIG_COMP_INTERINTRA_PRED cpi->common.fc.pre_interintra_prob = cpi->common.fc.interintra_prob; #endif vp9_zero(cpi->sub_mv_ref_count); - vp9_zero(cpi->mbsplit_count); vp9_zero(cpi->common.fc.mv_ref_ct); update_coef_probs(cpi, &header_bc); @@ -2173,9 +2068,6 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, active_section = 2; #endif - // TODO(jkoleszar): remove this unused bit - vp9_write_bit(&header_bc, 1); - vp9_update_skip_probs(cpi); for (i = 0; i < MBSKIP_CONTEXTS; ++i) { vp9_write_prob(&header_bc, pc->mbskip_pred_probs[i]); diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 2c06457e7..6bc42c7ff 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -29,7 +29,7 @@ typedef struct { B_PREDICTION_MODE mode; int_mv mv; int_mv second_mv; - } bmi[16]; + } bmi[4]; } PARTITION_INFO; // Structure to hold snapshot of coding context during the mode picking process @@ -117,7 +117,6 @@ struct macroblock { int mbmode_cost[2][MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; int bmode_costs[VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES][VP9_KF_BINTRAMODES]; - int i8x8_mode_costs[MB_MODE_COUNT]; int inter_bmode_costs[B_MODE_COUNT]; int switchable_interp_costs[VP9_SWITCHABLE_FILTERS + 1] [VP9_SWITCHABLE_FILTERS]; @@ -141,6 +140,9 @@ struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: + PICK_MODE_CONTEXT sb8_context[4][4][4]; + PICK_MODE_CONTEXT sb8x16_context[4][4][2]; + PICK_MODE_CONTEXT sb16x8_context[4][4][2]; PICK_MODE_CONTEXT mb_context[4][4]; PICK_MODE_CONTEXT sb32x16_context[4][2]; PICK_MODE_CONTEXT sb16x32_context[4][2]; @@ -157,12 +159,6 @@ struct macroblock { void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, int y_blocks); - void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2, - int y_blocks); - void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, - int y_blocks); - void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, - int y_blocks); }; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 52065df52..6366d382f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -20,7 +20,6 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_segmentation.h" -#include "vp9/common/vp9_setupintrarecon.h" #include "vp9/encoder/vp9_encodeintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_invtrans.h" @@ -47,9 +46,6 @@ int enc_debug = 0; void vp9_select_interp_filter_type(VP9_COMP *cpi); -static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mi_row, int mi_col); - static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize); @@ -380,6 +376,8 @@ static void update_state(VP9_COMP *cpi, } } if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) + ctx->txfm_rd_diff[ALLOW_16X16] = ctx->txfm_rd_diff[ALLOW_8X8]; ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } @@ -387,19 +385,10 @@ static void update_state(VP9_COMP *cpi, vpx_memcpy(x->partition_info, &ctx->partition_info, sizeof(PARTITION_INFO)); - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; -#if CONFIG_SB8X8 - vpx_memcpy(x->partition_info + mis, &ctx->partition_info, - sizeof(PARTITION_INFO)); - vpx_memcpy(x->partition_info + 1, &ctx->partition_info, - sizeof(PARTITION_INFO)); - vpx_memcpy(x->partition_info + mis + 1, &ctx->partition_info, - sizeof(PARTITION_INFO)); - xd->mode_info_context[1].mbmi = - xd->mode_info_context[mis].mbmi = - xd->mode_info_context[1 + mis].mbmi = *mbmi; -#endif + mbmi->mv[0].as_int = + x->partition_info->bmi[3].mv.as_int; + mbmi->mv[1].as_int = + x->partition_info->bmi[3].second_mv.as_int; } x->skip = ctx->skip; @@ -453,7 +442,6 @@ static void update_state(VP9_COMP *cpi, THR_D27_PRED /*D27_PRED*/, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, - THR_I8X8_PRED /*I8X8_PRED*/, THR_B_PRED /*I4X4_PRED*/, }; cpi->mode_chosen_counts[kf_mode_index[mb_mode]]++; @@ -491,11 +479,13 @@ static void update_state(VP9_COMP *cpi, mbmi->best_mv.as_int = best_mv.as_int; mbmi->best_second_mv.as_int = best_second_mv.as_int; vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv); -#if CONFIG_SB8X8 - xd->mode_info_context[1].mbmi = - xd->mode_info_context[mis].mbmi = - xd->mode_info_context[1 + mis].mbmi = *mbmi; -#endif + } + + if (bsize > BLOCK_SIZE_SB8X8 && mbmi->mode == NEWMV) { + int i, j; + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + xd->mode_info_context[mis * j + i].mbmi = *mbmi; } #if CONFIG_COMP_INTERINTRA_PRED if (mbmi->mode >= NEARESTMV && mbmi->mode < SPLITMV && @@ -567,8 +557,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - xd->above_seg_context = cm->above_seg_context + (mi_col >> CONFIG_SB8X8); - xd->left_seg_context = cm->left_seg_context + ((mi_row >> CONFIG_SB8X8) & 3); + xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); + xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3); } static void set_offsets(VP9_COMP *cpi, @@ -580,17 +570,17 @@ static void set_offsets(VP9_COMP *cpi, const int dst_fb_idx = cm->new_fb_idx; const int idx_str = xd->mode_info_stride * mi_row + mi_col; const int bw = 1 << mi_width_log2(bsize), bh = 1 << mi_height_log2(bsize); - const int mb_row = mi_row >> CONFIG_SB8X8; - const int mb_col = mi_col >> CONFIG_SB8X8; + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; const int idx_map = mb_row * cm->mb_cols + mb_col; int i; // entropy context structures for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].above_context = cm->above_context[i] + - (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[i].subsampling_x)); + (mi_col * 2 >> xd->plane[i].subsampling_x); xd->plane[i].left_context = cm->left_context[i] + - (((mi_row * 4 >> CONFIG_SB8X8) & 15) >> xd->plane[i].subsampling_y); + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y); } // partition contexts @@ -631,13 +621,11 @@ static void set_offsets(VP9_COMP *cpi, /* segment ID */ if (xd->segmentation_enabled) { - if (xd->update_mb_segmentation_map) { - mbmi->segment_id = find_seg_id(cpi->segmentation_map, bsize, - mi_row, cm->mi_rows, mi_col, cm->mi_cols); - } else { - mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, bsize, - mi_row, cm->mi_rows, mi_col, cm->mi_cols); - } + uint8_t *map = xd->update_mb_segmentation_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = find_seg_id(map, bsize, mi_row, + cm->mi_rows, mi_col, cm->mi_cols); + assert(mbmi->segment_id <= (MAX_MB_SEGMENTS-1)); vp9_mb_init_quantizer(cpi, x); @@ -655,9 +643,9 @@ static void set_offsets(VP9_COMP *cpi, const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); const int tile_progress = - cm->cur_tile_mi_col_start * cm->mb_rows >> CONFIG_SB8X8; + cm->cur_tile_mi_col_start * cm->mb_rows >> 1; const int mb_cols = - (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> CONFIG_SB8X8; + (cm->cur_tile_mi_col_end - cm->cur_tile_mi_col_start) >> 1; cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) << 16) / cm->MBs; @@ -667,47 +655,6 @@ static void set_offsets(VP9_COMP *cpi, } } -static int pick_mb_mode(VP9_COMP *cpi, - int mi_row, - int mi_col, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - int splitmodes_used = 0; - MB_MODE_INFO *mbmi; - - set_offsets(cpi, mi_row, mi_col, BLOCK_SIZE_MB16X16); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - mbmi = &xd->mode_info_context->mbmi; - mbmi->sb_type = BLOCK_SIZE_MB16X16; - - // Find best coding mode & reconstruct the MB so it is available - // as a predictor for MBs that follow in the SB - if (cm->frame_type == KEY_FRAME) { - vp9_rd_pick_intra_mode(cpi, x, totalrate, totaldist); - - // Save the coding context - vpx_memcpy(&x->mb_context[xd->sb_index][xd->mb_index].mic, - xd->mode_info_context, sizeof(MODE_INFO)); - } else { - vp9_pick_mode_inter_macroblock(cpi, x, mi_row, mi_col, - totalrate, totaldist); - splitmodes_used += (mbmi->mode == SPLITMV); - - if (cpi->mb.e_mbd.segmentation_enabled && mbmi->segment_id == 0) { - cpi->seg0_idx++; - } - } - - return splitmodes_used; -} - static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col, TOKENEXTRA **tp, int *totalrate, int *totaldist, BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx) { @@ -790,11 +737,10 @@ static void set_block_index(MACROBLOCKD *xd, int idx, BLOCK_SIZE_TYPE bsize) { if (bsize >= BLOCK_SIZE_SB32X32) { xd->sb_index = idx; - } else { -#if CONFIG_SB8X8 - assert(bsize >= BLOCK_SIZE_MB16X16); -#endif + } else if (bsize >= BLOCK_SIZE_MB16X16) { xd->mb_index = idx; + } else { + xd->b_index = idx; } } @@ -817,6 +763,12 @@ static PICK_MODE_CONTEXT *get_block_context(MACROBLOCK *x, return &x->sb16x32_context[xd->sb_index][xd->mb_index]; case BLOCK_SIZE_MB16X16: return &x->mb_context[xd->sb_index][xd->mb_index]; + case BLOCK_SIZE_SB16X8: + return &x->sb16x8_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X16: + return &x->sb8x16_context[xd->sb_index][xd->mb_index][xd->b_index]; + case BLOCK_SIZE_SB8X8: + return &x->sb8_context[xd->sb_index][xd->mb_index][xd->b_index]; default: assert(0); return NULL; @@ -837,14 +789,7 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, set_block_index(xd, sub_index, bsize); set_offsets(cpi, mi_row, mi_col, bsize); update_state(cpi, get_block_context(x, bsize), bsize, output_enabled); - if (bsize == BLOCK_SIZE_MB16X16) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - encode_macroblock(cpi, tp, output_enabled, mi_row, mi_col); - } else { - encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); - } + encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize); if (output_enabled) { update_stats(cpi, mi_row, mi_col); @@ -857,22 +802,26 @@ static void encode_b(VP9_COMP *cpi, TOKENEXTRA **tp, static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE_TYPE level, - BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4]) { + BLOCK_SIZE_TYPE c1, BLOCK_SIZE_TYPE c2[4], + BLOCK_SIZE_TYPE c3[4][4] + ) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = mi_width_log2(level), bs = 1 << (bsl - 1); const int bwl = mi_width_log2(c1), bhl = mi_height_log2(c1); - int pl; + int UNINITIALIZED_IS_SAFE(pl); if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - set_partition_seg_context(cpi, mi_row, mi_col); - pl = partition_plane_context(xd, level); + if (level > BLOCK_SIZE_SB8X8) { + set_partition_seg_context(cpi, mi_row, mi_col); + pl = partition_plane_context(xd, level); + } if (bsl == bwl && bsl == bhl) { - if (output_enabled && level > BLOCK_SIZE_MB16X16) + if (output_enabled && level > BLOCK_SIZE_SB8X8) cpi->partition_count[pl][PARTITION_NONE]++; encode_b(cpi, tp, mi_row, mi_col, output_enabled, c1, -1); } else if (bsl == bhl && bsl > bwl) { @@ -892,9 +841,11 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, assert(bwl < bsl && bhl < bsl); if (level == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; - } else { - assert(level == BLOCK_SIZE_SB32X32); + } else if (level == BLOCK_SIZE_SB32X32) { subsize = BLOCK_SIZE_MB16X16; + } else { + assert(level == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; } if (output_enabled) @@ -906,12 +857,12 @@ static void encode_sb(VP9_COMP *cpi, TOKENEXTRA **tp, set_block_index(xd, i, subsize); encode_sb(cpi, tp, mi_row + y_idx * bs, mi_col + x_idx * bs, output_enabled, subsize, - subsize == BLOCK_SIZE_MB16X16 ? c1 : c2[i], c2); + c2 ? c2[i] : c1, c3 ? c3[i] : NULL, NULL); } } - if (level > BLOCK_SIZE_MB16X16 && - (level == BLOCK_SIZE_SB32X32 || bsl == bwl || bsl == bhl)) { + if (level > BLOCK_SIZE_SB8X8 && + (level == BLOCK_SIZE_MB16X16 || bsl == bwl || bsl == bhl)) { set_partition_seg_context(cpi, mi_row, mi_col); update_partition_context(xd, c1, level); } @@ -932,9 +883,11 @@ static void encode_sb_row(VP9_COMP *cpi, // Code each SB in the row for (mi_col = cm->cur_tile_mi_col_start; - mi_col < cm->cur_tile_mi_col_end; mi_col += (4 << CONFIG_SB8X8)) { + mi_col < cm->cur_tile_mi_col_end; mi_col += 8) { int i, p; + BLOCK_SIZE_TYPE mb_partitioning[4][4]; BLOCK_SIZE_TYPE sb_partitioning[4]; + BLOCK_SIZE_TYPE sb64_partitioning = BLOCK_SIZE_SB32X32; int sb64_rate = 0, sb64_dist = 0; int sb64_skip = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -943,23 +896,27 @@ static void encode_sb_row(VP9_COMP *cpi, for (p = 0; p < MAX_MB_PLANE; p++) { memcpy(a + 16 * p, cm->above_context[p] + - (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)), + (mi_col * 2 >> xd->plane[p].subsampling_x), sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); memcpy(l + 16 * p, cm->left_context[p], sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); } - memcpy(&seg_a, cm->above_seg_context + (mi_col >> CONFIG_SB8X8), + memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1), sizeof(seg_a)); memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l)); + + // FIXME(rbultje): this function should probably be rewritten to be + // recursive at some point in the future. for (i = 0; i < 4; i++) { - const int x_idx = (i & 1) << (1 + CONFIG_SB8X8); - const int y_idx = (i & 2) << CONFIG_SB8X8; + const int x_idx = (i & 1) << 2; + const int y_idx = (i & 2) << 1; int sb32_rate = 0, sb32_dist = 0; int splitmodes_used = 0; int sb32_skip = 0; int j; ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE]; + sb_partitioning[i] = BLOCK_SIZE_MB16X16; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; @@ -969,22 +926,23 @@ static void encode_sb_row(VP9_COMP *cpi, for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy(l2 + 8 * p, cm->left_context[p] + - (y_idx * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_y)), + (y_idx * 2 >> xd->plane[p].subsampling_y), sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); vpx_memcpy(a2 + 8 * p, cm->above_context[p] + - ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_x)), + ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } /* Encode MBs in raster order within the SB */ - sb_partitioning[i] = BLOCK_SIZE_MB16X16; for (j = 0; j < 4; j++) { - const int x_idx_m = x_idx + ((j & 1) << CONFIG_SB8X8); - const int y_idx_m = y_idx + ((j >> 1) << CONFIG_SB8X8); + const int x_idx_m = x_idx + ((j & 1) << 1); + const int y_idx_m = y_idx + ((j >> 1) << 1); int r, d; + int r2, d2, mb16_rate = 0, mb16_dist = 0, k; + ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE]; + + mb_partitioning[i][j] = BLOCK_SIZE_SB8X8; if (mi_row + y_idx_m >= cm->mi_rows || mi_col + x_idx_m >= cm->mi_cols) { @@ -995,30 +953,165 @@ static void encode_sb_row(VP9_COMP *cpi, // Index of the MB in the SB 0..3 xd->mb_index = j; - splitmodes_used += pick_mb_mode(cpi, mi_row + y_idx_m, - mi_col + x_idx_m, tp, &r, &d); - sb32_rate += r; - sb32_dist += d; + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(l3 + 4 * p, + cm->left_context[p] + + (y_idx_m * 2 >> xd->plane[p].subsampling_y), + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(a3 + 4 * p, + cm->above_context[p] + + ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + for (k = 0; k < 4; k++) { + xd->b_index = k; + + // try 8x8 coding + pick_sb_modes(cpi, mi_row + y_idx_m + (k >> 1), + mi_col + x_idx_m + (k & 1), + tp, &r, &d, BLOCK_SIZE_SB8X8, + &x->sb8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + mb16_rate += r; + mb16_dist += d; + update_state(cpi, &x->sb8_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB8X8, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m + (k >> 1), + mi_col + x_idx_m + (k & 1), + BLOCK_SIZE_SB8X8); + } + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + mb16_rate += x->partition_cost[pl][PARTITION_SPLIT]; + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 2 >> xd->plane[p].subsampling_y), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try 8x16 coding + r2 = 0; + d2 = 0; + xd->b_index = 0; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB8X16, + &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + update_state(cpi, &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB8X16, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m, mi_col + x_idx_m, + BLOCK_SIZE_SB8X16); + xd->b_index = 1; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m + 1, + tp, &r, &d, BLOCK_SIZE_SB8X16, + &x->sb8x16_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r2 += x->partition_cost[pl][PARTITION_VERT]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r2; + mb16_dist = d2; + mb_partitioning[i][j] = BLOCK_SIZE_SB8X16; + } + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 2 >> xd->plane[p].subsampling_y), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try 16x8 coding + r2 = 0; + d2 = 0; + xd->b_index = 0; + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB16X8, + &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + update_state(cpi, &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index], + BLOCK_SIZE_SB16X8, 0); + encode_superblock(cpi, tp, + 0, mi_row + y_idx_m, mi_col + x_idx_m, + BLOCK_SIZE_SB16X8); + xd->b_index = 1; + pick_sb_modes(cpi, mi_row + y_idx_m + 1, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_SB16X8, + &x->sb16x8_context[xd->sb_index][xd->mb_index] + [xd->b_index]); + r2 += r; + d2 += d; + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r2 += x->partition_cost[pl][PARTITION_HORZ]; + if (RDCOST(x->rdmult, x->rddiv, r2, d2) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r2; + mb16_dist = d2; + mb_partitioning[i][j] = BLOCK_SIZE_SB16X8; + } + for (p = 0; p < MAX_MB_PLANE; p++) { + vpx_memcpy(cm->left_context[p] + + (y_idx_m * 2 >> xd->plane[p].subsampling_y), + l3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_y); + vpx_memcpy(cm->above_context[p] + + ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), + a3 + 4 * p, + sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); + } + + // try as 16x16 + pick_sb_modes(cpi, mi_row + y_idx_m, mi_col + x_idx_m, + tp, &r, &d, BLOCK_SIZE_MB16X16, + &x->mb_context[xd->sb_index][xd->mb_index]); + set_partition_seg_context(cpi, mi_row + y_idx_m, mi_col + x_idx_m); + pl = partition_plane_context(xd, BLOCK_SIZE_MB16X16); + r += x->partition_cost[pl][PARTITION_NONE]; + if (RDCOST(x->rdmult, x->rddiv, r, d) < + RDCOST(x->rdmult, x->rddiv, mb16_rate, mb16_dist)) { + mb16_rate = r; + mb16_dist = d; + mb_partitioning[i][j] = BLOCK_SIZE_MB16X16; + } + sb32_rate += mb16_rate; + sb32_dist += mb16_dist; // Dummy encode, do not do the tokenization -#if CONFIG_SB8X8 - update_state(cpi, &x->mb_context[xd->sb_index][xd->mb_index], - BLOCK_SIZE_MB16X16, 0); -#endif - encode_macroblock(cpi, tp, 0, mi_row + y_idx_m, - mi_col + x_idx_m); + encode_sb(cpi, tp, mi_row + y_idx_m, mi_col + x_idx_m, 0, + BLOCK_SIZE_MB16X16, mb_partitioning[i][j], NULL, NULL); } /* Restore L & A coding context to those in place on entry */ for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy(cm->left_context[p] + - (y_idx * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_y)), + (y_idx * 2 >> xd->plane[p].subsampling_y), l2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_x)), + ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), a2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } @@ -1033,14 +1126,14 @@ static void encode_sb_row(VP9_COMP *cpi, } // check 32x16 - if (mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols) { + if (mi_col + x_idx + 4 <= cm->mi_cols) { int r, d; xd->mb_index = 0; pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, tp, &r, &d, BLOCK_SIZE_SB32X16, &x->sb32x16_context[xd->sb_index][xd->mb_index]); - if (mi_row + y_idx + (1 << CONFIG_SB8X8) < cm->mi_rows) { + if (mi_row + y_idx + 2 < cm->mi_rows) { int r2, d2; update_state(cpi, &x->sb32x16_context[xd->sb_index][xd->mb_index], @@ -1049,7 +1142,7 @@ static void encode_sb_row(VP9_COMP *cpi, 0, mi_row + y_idx, mi_col + x_idx, BLOCK_SIZE_SB32X16); xd->mb_index = 1; - pick_sb_modes(cpi, mi_row + y_idx + (1 << CONFIG_SB8X8), + pick_sb_modes(cpi, mi_row + y_idx + 2, mi_col + x_idx, tp, &r2, &d2, BLOCK_SIZE_SB32X16, &x->sb32x16_context[xd->sb_index][xd->mb_index]); r += r2; @@ -1070,27 +1163,25 @@ static void encode_sb_row(VP9_COMP *cpi, for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy(cm->left_context[p] + - (y_idx * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_y)), + (y_idx * 2 >> xd->plane[p].subsampling_y), l2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_x)), + ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), a2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } } // check 16x32 - if (mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) { + if (mi_row + y_idx + 4 <= cm->mi_rows) { int r, d; xd->mb_index = 0; pick_sb_modes(cpi, mi_row + y_idx, mi_col + x_idx, tp, &r, &d, BLOCK_SIZE_SB16X32, &x->sb16x32_context[xd->sb_index][xd->mb_index]); - if (mi_col + x_idx + (1 << CONFIG_SB8X8) < cm->mi_cols) { + if (mi_col + x_idx + 2 < cm->mi_cols) { int r2, d2; update_state(cpi, &x->sb16x32_context[xd->sb_index][xd->mb_index], @@ -1100,7 +1191,7 @@ static void encode_sb_row(VP9_COMP *cpi, BLOCK_SIZE_SB16X32); xd->mb_index = 1; pick_sb_modes(cpi, mi_row + y_idx, - mi_col + x_idx + (1 << CONFIG_SB8X8), + mi_col + x_idx + 2, tp, &r2, &d2, BLOCK_SIZE_SB16X32, &x->sb16x32_context[xd->sb_index][xd->mb_index]); r += r2; @@ -1121,21 +1212,19 @@ static void encode_sb_row(VP9_COMP *cpi, for (p = 0; p < MAX_MB_PLANE; p++) { vpx_memcpy(cm->left_context[p] + - (y_idx * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_y)), + (y_idx * 2 >> xd->plane[p].subsampling_y), l2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_y); vpx_memcpy(cm->above_context[p] + - ((mi_col + x_idx) * 4 >> (CONFIG_SB8X8 + - xd->plane[p].subsampling_x)), + ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), a2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } } if (!sb32_skip && - mi_col + x_idx + (2 << CONFIG_SB8X8) <= cm->mi_cols && - mi_row + y_idx + (2 << CONFIG_SB8X8) <= cm->mi_rows) { + mi_col + x_idx + 4 <= cm->mi_cols && + mi_row + y_idx + 4 <= cm->mi_rows) { int r, d; /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ @@ -1170,18 +1259,19 @@ static void encode_sb_row(VP9_COMP *cpi, // instead of small->big) means we can use as threshold for small, which // may enable breakouts if RD is not good enough (i.e. faster) encode_sb(cpi, tp, mi_row + y_idx, mi_col + x_idx, 0, - BLOCK_SIZE_SB32X32, sb_partitioning[i], sb_partitioning); + BLOCK_SIZE_SB32X32, sb_partitioning[i], mb_partitioning[i], + NULL); } for (p = 0; p < MAX_MB_PLANE; p++) { memcpy(cm->above_context[p] + - (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)), + (mi_col * 2 >> xd->plane[p].subsampling_x), a + 16 * p, sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); memcpy(cm->left_context[p], l + 16 * p, sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); } - memcpy(cm->above_seg_context + (mi_col >> CONFIG_SB8X8), &seg_a, + memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a, sizeof(seg_a)); memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l)); @@ -1190,14 +1280,14 @@ static void encode_sb_row(VP9_COMP *cpi, sb64_rate += x->partition_cost[pl][PARTITION_SPLIT]; // check 64x32 - if (mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols && !(cm->mb_rows & 1)) { + if (mi_col + 8 <= cm->mi_cols && !(cm->mb_rows & 1)) { int r, d; xd->sb_index = 0; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, BLOCK_SIZE_SB64X32, &x->sb64x32_context[xd->sb_index]); - if (mi_row + (2 << CONFIG_SB8X8) != cm->mi_rows) { + if (mi_row + 4 != cm->mi_rows) { int r2, d2; update_state(cpi, &x->sb64x32_context[xd->sb_index], @@ -1205,7 +1295,7 @@ static void encode_sb_row(VP9_COMP *cpi, encode_superblock(cpi, tp, 0, mi_row, mi_col, BLOCK_SIZE_SB64X32); xd->sb_index = 1; - pick_sb_modes(cpi, mi_row + (2 << CONFIG_SB8X8), mi_col, + pick_sb_modes(cpi, mi_row + 4, mi_col, tp, &r2, &d2, BLOCK_SIZE_SB64X32, &x->sb64x32_context[xd->sb_index]); r += r2; @@ -1221,12 +1311,12 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB64X32; + sb64_partitioning = BLOCK_SIZE_SB64X32; } for (p = 0; p < MAX_MB_PLANE; p++) { memcpy(cm->above_context[p] + - (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)), + (mi_col * 2 >> xd->plane[p].subsampling_x), a + 16 * p, sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); memcpy(cm->left_context[p], l + 16 * p, @@ -1235,14 +1325,14 @@ static void encode_sb_row(VP9_COMP *cpi, } // check 32x64 - if (mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows && !(cm->mb_cols & 1)) { + if (mi_row + 8 <= cm->mi_rows && !(cm->mb_cols & 1)) { int r, d; xd->sb_index = 0; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, BLOCK_SIZE_SB32X64, &x->sb32x64_context[xd->sb_index]); - if (mi_col + (2 << CONFIG_SB8X8) != cm->mi_cols) { + if (mi_col + 4 != cm->mi_cols) { int r2, d2; update_state(cpi, &x->sb32x64_context[xd->sb_index], @@ -1250,7 +1340,7 @@ static void encode_sb_row(VP9_COMP *cpi, encode_superblock(cpi, tp, 0, mi_row, mi_col, BLOCK_SIZE_SB32X64); xd->sb_index = 1; - pick_sb_modes(cpi, mi_row, mi_col + (2 << CONFIG_SB8X8), + pick_sb_modes(cpi, mi_row, mi_col + 4, tp, &r2, &d2, BLOCK_SIZE_SB32X64, &x->sb32x64_context[xd->sb_index]); r += r2; @@ -1266,12 +1356,12 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB32X64; + sb64_partitioning = BLOCK_SIZE_SB32X64; } for (p = 0; p < MAX_MB_PLANE; p++) { memcpy(cm->above_context[p] + - (mi_col * 4 >> (CONFIG_SB8X8 + xd->plane[p].subsampling_x)), + (mi_col * 2 >> xd->plane[p].subsampling_x), a + 16 * p, sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_x); memcpy(cm->left_context[p], l + 16 * p, @@ -1280,8 +1370,8 @@ static void encode_sb_row(VP9_COMP *cpi, } if (!sb64_skip && - mi_col + (4 << CONFIG_SB8X8) <= cm->mi_cols && - mi_row + (4 << CONFIG_SB8X8) <= cm->mi_rows) { + mi_col + 8 <= cm->mi_cols && + mi_row + 8 <= cm->mi_rows) { int r, d; pick_sb_modes(cpi, mi_row, mi_col, tp, &r, &d, @@ -1295,13 +1385,13 @@ static void encode_sb_row(VP9_COMP *cpi, RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist)) { sb64_rate = r; sb64_dist = d; - sb_partitioning[0] = BLOCK_SIZE_SB64X64; + sb64_partitioning = BLOCK_SIZE_SB64X64; } } assert(tp_orig == *tp); - encode_sb(cpi, tp, mi_row, mi_col, 1, - BLOCK_SIZE_SB64X64, sb_partitioning[0], sb_partitioning); + encode_sb(cpi, tp, mi_row, mi_col, 1, BLOCK_SIZE_SB64X64, + sb64_partitioning, sb_partitioning, mb_partitioning); assert(tp_orig < *tp); } } @@ -1333,9 +1423,6 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { 0, 0, NULL, NULL); setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0); - // set up frame for intra coded blocks - vp9_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); - vp9_build_block_offsets(x); vp9_setup_block_dptrs(&x->e_mbd); @@ -1346,10 +1433,8 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->count_mb_ref_frame_usage) vp9_zero(cpi->bmode_count) vp9_zero(cpi->ymode_count) - vp9_zero(cpi->i8x8_mode_count) vp9_zero(cpi->y_uv_mode_count) vp9_zero(cpi->sub_mv_ref_count) - vp9_zero(cpi->mbsplit_count) vp9_zero(cpi->common.fc.mv_ref_ct) vp9_zero(cpi->sb_ymode_count) vp9_zero(cpi->partition_count); @@ -1487,7 +1572,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_get_tile_col_offsets(cm, tile_col); for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end; - mi_row += (4 << CONFIG_SB8X8)) { + mi_row += 8) { encode_sb_row(cpi, mi_row, &tp, &totalrate); } cpi->tok_count[tile_col] = (unsigned int)(tp - tp_old); @@ -1616,9 +1701,11 @@ static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, assert(bwl < bsl && bhl < bsl); if (bsize == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; - } else { - assert(bsize == BLOCK_SIZE_SB32X32); + } else if (bsize == BLOCK_SIZE_SB32X32) { subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; } for (n = 0; n < 4; n++) { @@ -1638,10 +1725,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { MODE_INFO *mi, *mi_ptr = cm->mi; for (mi_row = 0; mi_row < cm->mi_rows; - mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) { + mi_row += 8, mi_ptr += 8 * mis) { mi = mi_ptr; for (mi_col = 0; mi_col < cm->mi_cols; - mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) { + mi_col += 8, mi += 8) { reset_skip_txfm_size_sb(cpi, mi, txfm_max, mi_row, mi_col, BLOCK_SIZE_SB64X64); } @@ -1823,30 +1910,16 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { do { ++ bct[xd->block[b].bmi.as_mode.first]; - } while (++b < 16); - } - - if (m == I8X8_PRED) { - i8x8_modes[xd->block[0].bmi.as_mode.first]++; - i8x8_modes[xd->block[2].bmi.as_mode.first]++; - i8x8_modes[xd->block[8].bmi.as_mode.first]++; - i8x8_modes[xd->block[10].bmi.as_mode.first]++; + } while (++b < 4); } #endif - if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_MB16X16) { + if (xd->mode_info_context->mbmi.sb_type > BLOCK_SIZE_SB8X8) { ++cpi->sb_ymode_count[m]; } else { ++cpi->ymode_count[m]; } - if (m != I8X8_PRED) ++cpi->y_uv_mode_count[m][uvm]; - else { - cpi->i8x8_mode_count[xd->mode_info_context->bmi[0].as_mode.first]++; - cpi->i8x8_mode_count[xd->mode_info_context->bmi[2].as_mode.first]++; - cpi->i8x8_mode_count[xd->mode_info_context->bmi[8].as_mode.first]++; - cpi->i8x8_mode_count[xd->mode_info_context->bmi[10].as_mode.first]++; - } if (m == I4X4_PRED) { int b = 0; do { @@ -1855,7 +1928,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { if (m == B_CONTEXT_PRED) m -= CONTEXT_PRED_REPLACEMENTS; #endif ++cpi->bmode_count[m]; - } while (++b < 16); + } while (++b < 4); } } @@ -1880,254 +1953,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, - int mi_row, int mi_col) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MODE_INFO *mi = xd->mode_info_context; - MB_MODE_INFO *const mbmi = &mi->mbmi; - const int mis = cm->mode_info_stride; -#if CONFIG_SB8X8 - int n; -#endif - - assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_MB16X16); - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) - printf("Encode MB %d %d output %d\n", mb_row, mb_col, output_enabled); -#endif - if (cm->frame_type == KEY_FRAME) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM && output_enabled) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - } else { - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - } - - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (mbmi->ref_frame != INTRA_FRAME) { - if (mbmi->mode == ZEROMV) { - if (mbmi->ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (mbmi->mode == SPLITMV) - cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } else { - cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - } - - if (mbmi->ref_frame == INTRA_FRAME) { -#if 0 // def ENC_DEBUG - if (enc_debug) { - printf("Mode %d skip %d tx_size %d\n", mbmi->mode, x->skip, - mbmi->txfm_size); - } -#endif - if (mbmi->mode == I4X4_PRED) { - vp9_encode_intra16x16mbuv(cm, x); - vp9_encode_intra4x4mby(x, BLOCK_SIZE_MB16X16); - } else if (mbmi->mode == I8X8_PRED) { - vp9_encode_intra8x8mby(x); - vp9_encode_intra8x8mbuv(x); - } else { - vp9_encode_intra16x16mbuv(cm, x); - vp9_encode_intra16x16mby(cm, x); - } - - if (output_enabled) - sum_intra_stats(cpi, x); - } else { - int ref_fb_idx, second_ref_fb_idx; -#ifdef ENC_DEBUG - if (enc_debug) - printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n", - mbmi->mode, x->skip, mbmi->txfm_size, - mbmi->ref_frame, mbmi->second_ref_frame, - mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, - mbmi->interp_filter); -#endif - - assert(cm->frame_type != KEY_FRAME); - - if (mbmi->ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (mbmi->ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - if (mbmi->second_ref_frame > 0) { - if (mbmi->second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (mbmi->second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - } - - setup_pre_planes(xd, - &cpi->common.yv12_fb[ref_fb_idx], - mbmi->second_ref_frame > 0 ? &cpi->common.yv12_fb[second_ref_fb_idx] - : NULL, - mi_row, mi_col, xd->scale_factor, xd->scale_factor_uv); - - if (!x->skip) { - vp9_encode_inter16x16(cm, x, mi_row, mi_col); - } else { - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); -#if CONFIG_COMP_INTERINTRA_PRED - if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_predictors(xd, - xd->plane[0].dst.buf, - xd->plane[1].dst.buf, - xd->plane[2].dst.buf, - xd->plane[0].dst.stride, - xd->plane[1].dst.stride, - BLOCK_SIZE_MB16X16); - } -#endif - } - } - - if (!x->skip) { -#ifdef ENC_DEBUG - if (enc_debug) { - int i, j; - printf("\n"); - printf("qcoeff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->qcoeff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("predictor\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->predictor[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("src_diff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", x->src_diff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("diff\n"); - for (i = 0; i < 384; i++) { - printf("%3d ", xd->block[0].diff[i]); - if (i % 16 == 15) printf("\n"); - } - printf("\n"); - printf("final y\n"); - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j++) - printf("%3d ", xd->plane[0].dst.buf[i * xd->plane[0].dst.stride + j]); - printf("\n"); - } - printf("\n"); - printf("final u\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->plane[1].dst.buf[i * xd->plane[1].dst.stride + j]); - printf("\n"); - } - printf("\n"); - printf("final v\n"); - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - printf("%3d ", xd->plane[2].dst.buf[i * xd->plane[1].dst.stride + j]); - printf("\n"); - } - fflush(stdout); - } -#endif - - vp9_tokenize_sb(cpi, xd, t, !output_enabled, BLOCK_SIZE_MB16X16); - } else { - // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff; - - mbmi->mb_skip_coeff = 1; - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_MB16X16); - } - -#if CONFIG_SB8X8 - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - for (n = 1; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - if (mi_col + x_idx < cm->mi_cols && mi_row + y_idx < cm->mi_rows) - mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } -#endif - - if (output_enabled) { - int segment_id = mbmi->segment_id; - if (cpi->common.txfm_mode == TX_MODE_SELECT && - !(mbmi->mb_skip_coeff || - vp9_segfeature_active(&x->e_mbd, segment_id, SEG_LVL_SKIP))) { - assert(mbmi->txfm_size <= TX_16X16); - if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV) { - cpi->txfm_count_16x16p[mbmi->txfm_size]++; - } else if (mbmi->mode == I8X8_PRED || - (mbmi->mode == SPLITMV && - mbmi->partitioning != PARTITIONING_4X4)) { - cpi->txfm_count_8x8p[mbmi->txfm_size]++; - } - } else { -#if CONFIG_SB8X8 - int y, x; -#endif - if (mbmi->mode != I4X4_PRED && mbmi->mode != I8X8_PRED && - mbmi->mode != SPLITMV && cpi->common.txfm_mode >= ALLOW_16X16) { - mbmi->txfm_size = TX_16X16; - } else if (mbmi->mode != I4X4_PRED && - !(mbmi->mode == SPLITMV && - mbmi->partitioning == PARTITIONING_4X4) && - cpi->common.txfm_mode >= ALLOW_8X8) { - mbmi->txfm_size = TX_8X8; - } else { - mbmi->txfm_size = TX_4X4; - } - -#if CONFIG_SB8X8 - for (y = 0; y < 2; y++) { - for (x = !y; x < 2; x++) { - if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) { - mi[mis * y + x].mbmi.txfm_size = mbmi->txfm_size; - } - } - } -#endif - } - } -} - static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE_TYPE bsize) { @@ -2177,7 +2002,17 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_update_zbin_extra(cpi, x); } - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + if (xd->mode_info_context->mbmi.mode == I4X4_PRED) { + assert(bsize == BLOCK_SIZE_SB8X8 && + xd->mode_info_context->mbmi.txfm_size == TX_4X4); + + vp9_encode_intra4x4mby(x, bsize); + vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); + vp9_encode_sbuv(cm, x, bsize); + + if (output_enabled) + sum_intra_stats(cpi, x); + } else if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp9_build_intra_predictors_sby_s(&x->e_mbd, bsize); vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); if (output_enabled) @@ -2212,83 +2047,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); } - if (!x->skip) { - vp9_subtract_sb(x, bsize); - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - vp9_transform_sby_32x32(x, bsize); - vp9_quantize_sby_32x32(x, bsize); - if (bsize == BLOCK_SIZE_SB64X64) { - vp9_transform_sbuv_32x32(x, bsize); - vp9_quantize_sbuv_32x32(x, bsize); - } else { - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - } - if (x->optimize) { - vp9_optimize_sby_32x32(cm, x, bsize); - if (bsize == BLOCK_SIZE_SB64X64) - vp9_optimize_sbuv_32x32(cm, x, bsize); - else - vp9_optimize_sbuv_16x16(cm, x, bsize); - } - vp9_inverse_transform_sby_32x32(xd, bsize); - if (bsize == BLOCK_SIZE_SB64X64) - vp9_inverse_transform_sbuv_32x32(xd, bsize); - else - vp9_inverse_transform_sbuv_16x16(xd, bsize); - break; - case TX_16X16: - vp9_transform_sby_16x16(x, bsize); - vp9_quantize_sby_16x16(x, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) { - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - } else { - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - } - if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) - vp9_optimize_sbuv_16x16(cm, x, bsize); - else - vp9_optimize_sbuv_8x8(cm, x, bsize); - } - vp9_inverse_transform_sby_16x16(xd, bsize); - if (bsize >= BLOCK_SIZE_SB32X32) - vp9_inverse_transform_sbuv_16x16(xd, bsize); - else - vp9_inverse_transform_sbuv_8x8(xd, bsize); - break; - case TX_8X8: - vp9_transform_sby_8x8(x, bsize); - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sby_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - if (x->optimize) { - vp9_optimize_sby_8x8(cm, x, bsize); - vp9_optimize_sbuv_8x8(cm, x, bsize); - } - vp9_inverse_transform_sby_8x8(xd, bsize); - vp9_inverse_transform_sbuv_8x8(xd, bsize); - break; - case TX_4X4: - vp9_transform_sby_4x4(x, bsize); - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sby_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, bsize); - vp9_optimize_sbuv_4x4(cm, x, bsize); - } - vp9_inverse_transform_sby_4x4(xd, bsize); - vp9_inverse_transform_sbuv_4x4(xd, bsize); - break; - default: assert(0); - } - vp9_recon_sb_c(xd, bsize); + if (xd->mode_info_context->mbmi.mode == I4X4_PRED) { + assert(bsize == BLOCK_SIZE_SB8X8); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); + } else if (!x->skip) { + vp9_encode_sb(cm, x, bsize); vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); } else { // FIXME(rbultje): not tile-aware (mi - 1) @@ -2315,8 +2078,10 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP))) { if (bsize >= BLOCK_SIZE_SB32X32) { cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; - } else { + } else if (bsize >= BLOCK_SIZE_MB16X16) { cpi->txfm_count_16x16p[mi->mbmi.txfm_size]++; + } else { + cpi->txfm_count_8x8p[mi->mbmi.txfm_size]++; } } else { int x, y; @@ -2324,6 +2089,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) sz = TX_16X16; + if (sz == TX_16X16 && bsize < BLOCK_SIZE_MB16X16) + sz = TX_8X8; + if (sz == TX_8X8 && (xd->mode_info_context->mbmi.mode == SPLITMV || + xd->mode_info_context->mbmi.mode == I4X4_PRED)) + sz = TX_4X4; for (y = 0; y < bh; y++) { for (x = 0; x < bw; x++) { diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index f6ddca8f4..72a6603f8 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -57,36 +57,37 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib, raster_block_offset_int16(xd, bsize, 0, ib, xd->plane[0].diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - assert(ib < 16); + assert(ib < (1 << (bwl + bhl))); #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[ib].as_mode.context = vp9_find_bpred_context(&x->e_mbd, ib, dst, xd->plane[0].dst.stride); #endif - vp9_intra4x4_predict(&x->e_mbd, ib, + vp9_intra4x4_predict(&x->e_mbd, ib, bsize, xd->mode_info_context->bmi[ib].as_mode.first, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 16, + vp9_subtract_block(4, 4, src_diff, 4 << bwl, src, x->plane[0].src.stride, dst, xd->plane[0].dst.stride); tx_type = get_tx_type_4x4(&x->e_mbd, ib); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); + vp9_short_fht4x4(src_diff, coeff, 4 << bwl, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 16, tx_type); + diff, 4 << bwl, tx_type); } else { - x->fwd_txm4x4(src_diff, coeff, 32); + x->fwd_txm4x4(src_diff, coeff, 8 << bwl); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib], BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 32); + diff, 8 << bwl); } - vp9_recon_b(dst, diff, dst, xd->plane[0].dst.stride); + vp9_recon_b(dst, diff, 4 << bwl, dst, xd->plane[0].dst.stride); } void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) { @@ -100,207 +101,14 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) { void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16); - vp9_subtract_sby(x, BLOCK_SIZE_MB16X16); - - switch (tx_size) { - case TX_16X16: - vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); - break; - case TX_8X8: - vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); - break; - default: - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); - break; - } - - vp9_recon_sby(xd, BLOCK_SIZE_MB16X16); + vp9_encode_sby(cm, x, BLOCK_SIZE_MB16X16); } void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); - vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16); - - switch (tx_size) { - case TX_4X4: - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); - break; - default: // 16x16 or 8x8 - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - break; - } - - vp9_recon_sbuv(xd, BLOCK_SIZE_MB16X16); -} - -void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { - MACROBLOCKD *xd = &x->e_mbd; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src.buf, x->plane[0].src.stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src_diff); - int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].diff); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); - const int iblock[4] = {0, 1, 4, 5}; - int i; - TX_TYPE tx_type; - - vp9_intra8x8_predict(xd, ib, xd->mode_info_context->bmi[ib].as_mode.first, - dst, xd->plane[0].dst.stride); - // generate residual blocks - vp9_subtract_block(8, 8, src_diff, 16, - src, x->plane[0].src.stride, - dst, xd->plane[0].dst.stride); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16); - - assert(idx < 16); - tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(src_diff, coeff, 16, tx_type); - x->quantize_b_8x8(x, idx, tx_type, 16); - vp9_short_iht8x8(dqcoeff, diff, 16, tx_type); - } else { - x->fwd_txm8x8(src_diff, coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT, 16); - vp9_short_idct8x8(dqcoeff, diff, 32); - } - } else { - for (i = 0; i < 4; i++) { - int idx = ib + iblock[i]; - int16_t* const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16); - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx, - x->plane[0].src_diff); - int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, idx, - xd->plane[0].diff); - - assert(idx < 16); - tx_type = get_tx_type_4x4(xd, ib + iblock[i]); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); - x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16); - vp9_short_iht4x4(dqcoeff, diff, 16, tx_type); - } else if (!(i & 1) && - get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { - x->fwd_txm8x4(src_diff, coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16); - vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]], - dqcoeff, diff, 32); - vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i] + 1], - dqcoeff + 16, diff + 4, 32); - i++; - } else { - x->fwd_txm4x4(src_diff, coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16); - vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[ib + iblock[i]], - dqcoeff, diff, 32); - } - } - } - - // reconstruct submacroblock - for (i = 0; i < 4; i++) { - int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i], - xd->plane[0].diff); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i], - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); - vp9_recon_b_c(dst, diff, dst, xd->plane[0].dst.stride); - } -} - -void vp9_encode_intra8x8mby(MACROBLOCK *x) { - int i; - - for (i = 0; i < 4; i++) - vp9_encode_intra8x8(x, vp9_i8x8_block[i]); -} - -static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { - MACROBLOCKD * const xd = &x->e_mbd; - int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib); - int16_t* const coeff = MB_SUBBLOCK_FIELD(x, coeff, ib); - const int plane = ib < 20 ? 1 : 2; - const int block = ib < 20 ? ib - 16 : ib - 20; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block, - x->plane[plane].src.buf, - x->plane[plane].src.stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block, - x->plane[plane].src_diff); - int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, plane, block, - xd->plane[plane].diff); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, plane, block, - xd->plane[plane].dst.buf, - xd->plane[plane].dst.stride); - - assert(ib >= 16 && ib < 24); - vp9_intra_uv4x4_predict(&x->e_mbd, ib, mode, - dst, xd->plane[plane].dst.stride); - - assert(xd->plane[1].subsampling_x == 1); - vp9_subtract_block(4, 4, src_diff, 8, - src, x->plane[plane].src.stride, - dst, xd->plane[plane].dst.stride); - - x->fwd_txm4x4(src_diff, coeff, 16); - x->quantize_b_4x4(x, ib, DCT_DCT, 16); - vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[plane].eobs[block], - dqcoeff, diff, 16); - - vp9_recon_uv_b_c(dst, diff, dst, xd->plane[plane].dst.stride); -} - -void vp9_encode_intra8x8mbuv(MACROBLOCK *x) { - int i; - - for (i = 0; i < 4; i++) { - int mode = x->e_mbd.mode_info_context->bmi[vp9_i8x8_block[i]].as_mode.first; - - encode_intra_uv4x4(x, i + 16, mode); // u - encode_intra_uv4x4(x, i + 20, mode); // v - } + vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16); } diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 7ec2f11d4..c26200494 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -17,8 +17,4 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bs); -void vp9_encode_intra8x8mby(MACROBLOCK *x); -void vp9_encode_intra8x8mbuv(MACROBLOCK *x); -void vp9_encode_intra8x8(MACROBLOCK *x, int ib); - #endif // VP9_ENCODER_VP9_ENCODEINTRA_H_ diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 0cb1ae958..421052753 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -67,143 +67,6 @@ void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { } -void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - const int stride = 32 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32, - x->plane[0].coeff + n * 1024, stride * 2); - } -} - -void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - const int stride = 16 << bwl, bstride = 4 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_16x16(xd, - (y_idx * bstride + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->plane[0].src_diff + - y_idx * stride * 16 + x_idx * 16, - x->plane[0].coeff + n * 256, stride, tx_type); - } else { - x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[0].coeff + n * 256, stride * 2); - } - } -} - -void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - const int stride = 8 << bwl, bstride = 2 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[0].coeff + n * 64, stride, tx_type); - } else { - x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[0].coeff + n * 64, stride * 2); - } - } -} - -void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - const int stride = 4 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[0].coeff + n * 16, stride, tx_type); - } else { - x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[0].coeff + n * 16, stride * 2); - } - } -} - -void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - assert(bsize == BLOCK_SIZE_SB64X64); - vp9_clear_system_state(); - vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64); - vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64); -} - -void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 16 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[1].coeff + n * 256, stride * 2); - x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[2].coeff + n * 256, stride * 2); - } -} - -void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 8 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[1].coeff + n * 64, stride * 2); - x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[2].coeff + n * 64, stride * 2); - } -} - -void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 4 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[1].coeff + n * 16, stride * 2); - x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[2].coeff + n * 16, stride * 2); - } -} - #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp9_token_state vp9_token_state; @@ -249,57 +112,53 @@ static int trellis_get_coeff_context(const int *scan, return pt; } -static void optimize_b(VP9_COMMON *const cm, - MACROBLOCK *mb, int ib, PLANE_TYPE type, - const int16_t *dequant_ptr, +static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, + int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size, int y_blocks) { + TX_SIZE tx_size) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, - pb_idx.block, 16); + const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, + block, 16); int16_t *qcoeff_ptr; int16_t *dqcoeff_ptr; - int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0; + int eob = xd->plane[plane].eobs[block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; + PLANE_TYPE type = xd->plane[plane].plane_type; int err_mult = plane_rd_mult[type]; int default_eob, pad; int const *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); uint8_t token_cache[1024]; + const int ib = txfrm_block_to_raster_block(xd, bsize, plane, + block, 2 * tx_size); + const int16_t *dequant_ptr = xd->plane[plane].dequant; - assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); - dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); - qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); + assert((!type && !plane) || (type && plane)); + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); switch (tx_size) { default: case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(xd, ib); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT; default_eob = 16; scan = get_scan_4x4(tx_type); break; } case TX_8X8: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT; scan = get_scan_8x8(tx_type); default_eob = 64; break; } case TX_16X16: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT; scan = get_scan_16x16(tx_type); default_eob = 256; break; @@ -480,272 +339,268 @@ static void optimize_b(VP9_COMMON *const cm, } final_eob++; - xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob; + xd->plane[plane].eobs[block] = final_eob; *a = *l = (final_eob > 0); } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - for (n = 0; n < bw; n++, a += 8) - ta[n] = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - for (n = 0; n < bh; n++, l += 8) - tl[n] = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh); - } +struct optimize_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; + +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *mb, + struct optimize_ctx *ctx) { + MACROBLOCKD* const xd = &mb->e_mbd; + int x, y; + + // find current entropy context + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); + + optimize_b(cm, mb, plane, block, bsize, + &ctx->ta[plane][x], &ctx->tl[plane][y], + ss_txfrm_size / 2); } -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh); - } +static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + const struct optimize_block_args* const args = arg; + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, args->x, + args->ctx); } -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh); +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx) { + int p; + + for (p = 0; p < MAX_MB_PLANE; p++) { + const struct macroblockd_plane* const plane = &xd->plane[p]; + const int bwl = b_width_log2(bsize) - plane->subsampling_x; + const int bhl = b_height_log2(bsize) - plane->subsampling_y; + const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p); + int i, j; + + for (i = 0; i < 1 << bwl; i += 1 << tx_size) { + int c = 0; + ctx->ta[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->ta[p][i] |= plane->above_context[i + j]; + } + } + for (i = 0; i < 1 << bhl; i += 1 << tx_size) { + int c = 0; + ctx->tl[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->tl[p][i] |= plane->left_context[i + j]; + } + } } } -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int bwl = b_width_log2(bsize), bw = 1 << bwl; - int bh = 1 << b_height_log2(bsize); - ENTROPY_CONTEXT ta[16], tl[16]; - int n; +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, + optimize_block, &arg); +} - vpx_memcpy(ta, xd->plane[0].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[0].left_context, sizeof(ENTROPY_CONTEXT) * bh); +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); +} - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; +struct encode_b_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, bh * bw); +static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + x->plane[plane].src_diff); + TX_TYPE tx_type = DCT_DCT; + + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_fdct32x32(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht16x16(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm16x16(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht8x8(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm8x8(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type != DCT_DCT) { + vp9_short_fht4x4(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw, tx_type); + } else { + x->fwd_txm4x4(src_diff, + BLOCK_OFFSET(x->plane[plane].coeff, block, 16), + bw * 2); + } + break; + default: + assert(0); } -} -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int b; - - assert(bsize == BLOCK_SIZE_SB64X64); - for (b = 256; b < 384; b += 64) { - const int plane = 1 + (b >= 320); - ENTROPY_CONTEXT *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT *l = xd->plane[plane].left_context; - ENTROPY_CONTEXT a_ec, l_ec; - - a_ec = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant, - &a_ec, &l_ec, TX_32X32, 256); - } + vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type); } -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 16 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[2], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[2], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_16X16, bh * bw * 64); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v +static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + int16_t* const diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + xd->plane[plane].diff); + TX_TYPE tx_type = DCT_DCT; + + xform_quant(plane, block, bsize, ss_txfrm_size, arg); + + if (x->optimize) + vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); + + switch (ss_txfrm_size / 2) { + case TX_32X32: + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + break; + case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + } else { + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; + case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + vp9_short_idct8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw * 2); + } else { + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; + case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; + if (tx_type == DCT_DCT) { + // this is like vp9_short_idct4x4 but has a special case around eob<=1 + // which is significant (not just an optimization) for the lossless + // case. + vp9_inverse_transform_b_4x4(xd, xd->plane[plane].eobs[block], + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2); + } else { + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + diff, bw, tx_type); + } + break; } } -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 4 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[4], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[4], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_8X8, bh * bw * 16); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; + + foreach_transformed_block_in_plane(xd, bsize, 0, + xform_quant, &arg); } -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 1 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[8], tl[8]; - - vpx_memcpy(ta, xd->plane[plane].above_context, - sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[plane].left_context, - sizeof(ENTROPY_CONTEXT) * bh); - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_4X4, bh * bw * 4); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; + + foreach_transformed_block_uv(xd, bsize, xform_quant, &arg); } -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; - - if (tx_size == TX_16X16) { - vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); - } - vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - } else if (tx_size == TX_8X8) { - vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); - if (xd->mode_info_context->mbmi.mode == SPLITMV) { - assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); - } else { - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - } - } else { - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); - } - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); - } +void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + vp9_subtract_sby(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); + + foreach_transformed_block_in_plane(xd, bsize, 0, + encode_block, &arg); + + vp9_recon_sby(xd, bsize); } -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mi_row, int mi_col) { - MACROBLOCKD *const xd = &x->e_mbd; +void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + vp9_subtract_sbuv(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); + + foreach_transformed_block_uv(xd, bsize, encode_block, &arg); - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); - vp9_subtract_sb(x, BLOCK_SIZE_MB16X16); - vp9_fidct_mb(cm, x); - vp9_recon_sb(xd, BLOCK_SIZE_MB16X16); + vp9_recon_sbuv(xd, bsize); } -/* this function is used by first pass only */ -void vp9_encode_inter16x16y(MACROBLOCK *x, int mi_row, int mi_col) { - MACROBLOCKD *xd = &x->e_mbd; +void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_SIZE_MB16X16); - vp9_subtract_sby(x, BLOCK_SIZE_MB16X16); + vp9_subtract_sb(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); + foreach_transformed_block(xd, bsize, encode_block, &arg); - vp9_recon_sby(xd, BLOCK_SIZE_MB16X16); + vp9_recon_sb(xd, bsize); } diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index da134a86b..afbe4466b 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -22,39 +22,29 @@ typedef struct { MV_REFERENCE_FRAME second_ref_frame; } MODE_DEFINITION; - -struct VP9_ENCODER_RTCD; -void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int mb_row, int mb_col); - -void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); - -void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, +struct optimize_ctx { + ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; +}; +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx); +void vp9_optimize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, VP9_COMMON *cm, MACROBLOCK *x, + struct optimize_ctx *ctx); +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); + +void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); + +void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); - -void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_subtract_block(int rows, int cols, int16_t *diff_ptr, int diff_stride, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e4d68630d..a1898af48 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -14,7 +14,6 @@ #include "vp9/encoder/vp9_onyx_int.h" #include "vp9/encoder/vp9_variance.h" #include "vp9/encoder/vp9_encodeintra.h" -#include "vp9/common/vp9_setupintrarecon.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_firstpass.h" #include "vpx_scale/vpx_scale.h" @@ -247,8 +246,8 @@ static void avg_stats(FIRSTPASS_STATS *section) { // Calculate a modified Error used in distributing bits between easier and harder frames static double calculate_modified_err(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { - double av_err = (cpi->twopass.total_stats->ssim_weighted_pred_err / - cpi->twopass.total_stats->count); + double av_err = (cpi->twopass.total_stats.ssim_weighted_pred_err / + cpi->twopass.total_stats.count); double this_err = this_frame->ssim_weighted_pred_err; double modified_err; @@ -328,7 +327,7 @@ static int frame_max_bits(VP9_COMP *cpi) { // For VBR base this on the bits and frames left plus the // two_pass_vbrmax_section rate passed in by the user. max_bits = (int) (((double) cpi->twopass.bits_left - / (cpi->twopass.total_stats->count - (double) cpi->common + / (cpi->twopass.total_stats.count - (double) cpi->common .current_video_frame)) * ((double) cpi->oxcf.two_pass_vbrmax_section / 100.0)); @@ -340,11 +339,11 @@ static int frame_max_bits(VP9_COMP *cpi) { } void vp9_init_first_pass(VP9_COMP *cpi) { - zero_stats(cpi->twopass.total_stats); + zero_stats(&cpi->twopass.total_stats); } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.total_stats); + output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.total_stats); } static void zz_motion_search(VP9_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { @@ -486,8 +485,6 @@ void vp9_first_pass(VP9_COMP *cpi) { vp9_setup_block_dptrs(&x->e_mbd); - // set up frame new frame for intra coded blocks - vp9_setup_intra_recon(new_yv12); vp9_frame_init_quantizer(cpi); // Initialise the MV cost table to the defaults @@ -521,9 +518,9 @@ void vp9_first_pass(VP9_COMP *cpi) { int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); set_mi_row_col(cm, xd, - mb_row << CONFIG_SB8X8, + mb_row << 1, 1 << mi_height_log2(BLOCK_SIZE_MB16X16), - mb_col << CONFIG_SB8X8, + mb_col << 1, 1 << mi_height_log2(BLOCK_SIZE_MB16X16)); xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; @@ -626,7 +623,10 @@ void vp9_first_pass(VP9_COMP *cpi) { this_error = motion_error; vp9_set_mbmode_and_mvs(x, NEWMV, &mv); xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_encode_inter16x16y(x, mb_row, mb_col); + vp9_build_inter_predictors_sby(xd, mb_row << 1, + mb_col << 1, + BLOCK_SIZE_MB16X16); + vp9_encode_sb(cm, x, BLOCK_SIZE_MB16X16); sum_mvr += mv.as_mv.row; sum_mvr_abs += abs(mv.as_mv.row); sum_mvc += mv.as_mv.col; @@ -751,20 +751,20 @@ void vp9_first_pass(VP9_COMP *cpi) { - cpi->source->ts_start); // don't want to do output stats with a stack variable! - memcpy(cpi->twopass.this_frame_stats, + memcpy(&cpi->twopass.this_frame_stats, &fps, sizeof(FIRSTPASS_STATS)); - output_stats(cpi, cpi->output_pkt_list, cpi->twopass.this_frame_stats); - accumulate_stats(cpi->twopass.total_stats, &fps); + output_stats(cpi, cpi->output_pkt_list, &cpi->twopass.this_frame_stats); + accumulate_stats(&cpi->twopass.total_stats, &fps); } // Copy the previous Last Frame back into gf and and arf buffers if // the prediction is good enough... but also dont allow it to lag too far if ((cpi->twopass.sr_update_lag > 3) || ((cm->current_video_frame > 0) && - (cpi->twopass.this_frame_stats->pcnt_inter > 0.20) && - ((cpi->twopass.this_frame_stats->intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats->coded_error)) > + (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && + ((cpi->twopass.this_frame_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) > 2.0))) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); cpi->twopass.sr_update_lag = 1; @@ -995,7 +995,7 @@ static int estimate_max_q(VP9_COMP *cpi, // Give average a chance to settle though. // PGW TODO.. This code is broken for the extended Q range if ((cpi->ni_frames > - ((int)cpi->twopass.total_stats->count >> 8)) && + ((int)cpi->twopass.total_stats.count >> 8)) && (cpi->ni_frames > 25)) { adjust_maxq_qrange(cpi); } @@ -1052,8 +1052,8 @@ static int estimate_cq(VP9_COMP *cpi, } // II ratio correction factor for clip as a whole - clip_iiratio = cpi->twopass.total_stats->intra_error / - DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats->coded_error); + clip_iiratio = cpi->twopass.total_stats.intra_error / + DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); if (clip_iifactor < 0.80) clip_iifactor = 0.80; @@ -1098,14 +1098,14 @@ void vp9_init_second_pass(VP9_COMP *cpi) { if (two_pass_min_rate < lower_bounds_min_rate) two_pass_min_rate = lower_bounds_min_rate; - zero_stats(cpi->twopass.total_stats); - zero_stats(cpi->twopass.total_left_stats); + zero_stats(&cpi->twopass.total_stats); + zero_stats(&cpi->twopass.total_left_stats); if (!cpi->twopass.stats_in_end) return; - *cpi->twopass.total_stats = *cpi->twopass.stats_in_end; - *cpi->twopass.total_left_stats = *cpi->twopass.total_stats; + cpi->twopass.total_stats = *cpi->twopass.stats_in_end; + cpi->twopass.total_left_stats = cpi->twopass.total_stats; // each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame @@ -1113,13 +1113,13 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // Its calculated based on the actual durations of all frames from the first // pass. vp9_new_frame_rate(cpi, - 10000000.0 * cpi->twopass.total_stats->count / - cpi->twopass.total_stats->duration); + 10000000.0 * cpi->twopass.total_stats.count / + cpi->twopass.total_stats.duration); cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats->duration * + cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0); - cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats->duration * + cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); // Calculate a minimum intra value to be used in determining the IIratio @@ -1145,7 +1145,8 @@ void vp9_init_second_pass(VP9_COMP *cpi) { sum_iiratio += IIRatio; } - cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats->count); + cpi->twopass.avg_iiratio = sum_iiratio / + DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); // Reset file position reset_fpf_position(cpi, start_pos); @@ -1828,7 +1829,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. // This is also important for short clips where there may only be one // key frame. - if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats->count - + if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame)) { cpi->twopass.kf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; @@ -2096,7 +2097,7 @@ static int adjust_active_maxq(int old_maxqi, int new_maxqi) { void vp9_second_pass(VP9_COMP *cpi) { int tmp_q; - int frames_left = (int)(cpi->twopass.total_stats->count - + int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); FIRSTPASS_STATS this_frame; @@ -2121,7 +2122,7 @@ void vp9_second_pass(VP9_COMP *cpi) { est_cq = estimate_cq(cpi, - cpi->twopass.total_left_stats, + &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left)); cpi->cq_target_quality = cpi->oxcf.cq_level; @@ -2135,7 +2136,7 @@ void vp9_second_pass(VP9_COMP *cpi) { tmp_q = estimate_max_q( cpi, - cpi->twopass.total_left_stats, + &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left)); cpi->active_worst_quality = tmp_q; @@ -2158,15 +2159,15 @@ void vp9_second_pass(VP9_COMP *cpi) { // radical adjustments to the allowed quantizer range just to use up a // few surplus bits or get beneath the target rate. else if ((cpi->common.current_video_frame < - (((unsigned int)cpi->twopass.total_stats->count * 255) >> 8)) && + (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) && ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < - (unsigned int)cpi->twopass.total_stats->count)) { + (unsigned int)cpi->twopass.total_stats.count)) { if (frames_left < 1) frames_left = 1; tmp_q = estimate_max_q( cpi, - cpi->twopass.total_left_stats, + &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left)); // Make a damped adjustment to active max Q @@ -2245,7 +2246,7 @@ void vp9_second_pass(VP9_COMP *cpi) { cpi->twopass.frames_to_key--; // Update the total stats remaining structure - subtract_stats(cpi->twopass.total_left_stats, &this_frame); + subtract_stats(&cpi->twopass.total_left_stats, &this_frame); } static int test_candidate_kf(VP9_COMP *cpi, diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index fe5d114ba..018c86cb9 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -11,7 +11,6 @@ #include <limits.h> #include <vp9/encoder/vp9_encodeintra.h> #include <vp9/encoder/vp9_rdopt.h> -#include <vp9/common/vp9_setupintrarecon.h> #include <vp9/common/vp9_blockd.h> #include <vp9/common/vp9_reconinter.h> #include <vp9/common/vp9_systemdependent.h> @@ -386,7 +385,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) { // goes in segment 0 if (arf_not_zz[offset + mb_col]) { ncnt[0]++; -#if CONFIG_SB8X8 cpi->segmentation_map[offset * 4 + 2 * mb_col] = 0; cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 0; cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 0; @@ -396,11 +394,6 @@ static void separate_arf_mbs(VP9_COMP *cpi) { cpi->segmentation_map[offset * 4 + 2 * mb_col + 1] = 1; cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols] = 1; cpi->segmentation_map[offset * 4 + 2 * mb_col + cm->mi_cols + 1] = 1; -#else - cpi->segmentation_map[offset + mb_col] = 0; - } else { - cpi->segmentation_map[offset + mb_col] = 1; -#endif ncnt[1]++; } } @@ -419,10 +412,10 @@ static void separate_arf_mbs(VP9_COMP *cpi) { cpi->static_mb_pct = 0; cpi->seg0_cnt = ncnt[0]; - vp9_enable_segmentation((VP9_PTR) cpi); + vp9_enable_segmentation((VP9_PTR)cpi); } else { cpi->static_mb_pct = 0; - vp9_disable_segmentation((VP9_PTR) cpi); + vp9_disable_segmentation((VP9_PTR)cpi); } // Free localy allocated storage diff --git a/vp9/encoder/vp9_modecosts.c b/vp9/encoder/vp9_modecosts.c index 7d9462f94..e26daf0c9 100644 --- a/vp9/encoder/vp9_modecosts.c +++ b/vp9/encoder/vp9_modecosts.c @@ -41,8 +41,6 @@ void vp9_init_mode_costs(VP9_COMP *c) { x->fc.uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); vp9_cost_tokens(c->mb.intra_uv_mode_cost[0], x->kf_uv_mode_prob[VP9_YMODES - 1], vp9_uv_mode_tree); - vp9_cost_tokens(c->mb.i8x8_mode_costs, - x->fc.i8x8_mode_prob, vp9_i8x8_mode_tree); for (i = 0; i <= VP9_SWITCHABLE_FILTERS; ++i) vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i], diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0af232eed..ffee34eb7 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -280,8 +280,7 @@ static void setup_features(VP9_COMP *cpi) { MACROBLOCKD *xd = &cpi->mb.e_mbd; // Set up default state for MB feature flags - - xd->segmentation_enabled = 0; // Default segmentation disabled + xd->segmentation_enabled = 0; xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -333,15 +332,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->mb.pip); cpi->mb.pip = 0; - - vpx_free(cpi->twopass.total_stats); - cpi->twopass.total_stats = 0; - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = 0; - - vpx_free(cpi->twopass.this_frame_stats); - cpi->twopass.this_frame_stats = 0; } // Computes a q delta (in "q index" terms) to get from a starting q value @@ -383,7 +373,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; #if CONFIG_IMPLICIT_SEGMENTATION - xd->allow_implicit_segment_update = 0; + xd->allow_implicit_segment_update = 0; #endif cpi->static_mb_pct = 0; @@ -399,7 +389,7 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; #if CONFIG_IMPLICIT_SEGMENTATION - xd->allow_implicit_segment_update = 0; + xd->allow_implicit_segment_update = 0; #endif cpi->static_mb_pct = 0; @@ -428,9 +418,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { xd->mb_segment_abs_delta = SEGMENT_DELTADATA; } - } - // All other frames if segmentation has been enabled - else if (xd->segmentation_enabled) { + } else if (xd->segmentation_enabled) { + // All other frames if segmentation has been enabled + // First normal frame in a valid gf or alt ref group if (cpi->common.frames_since_golden == 0) { // Set up segment features for normal frames in an arf group @@ -454,10 +444,10 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_SKIP); } - } - // Disable segmentation and clear down features if alt ref - // is not active for this group - else { + } else { + // Disable segmentation and clear down features if alt ref + // is not active for this group + vp9_disable_segmentation((VP9_PTR)cpi); vpx_memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); @@ -467,12 +457,11 @@ static void configure_static_seg_features(VP9_COMP *cpi) { vp9_clearall_segfeatures(xd); } - } + } else if (cpi->is_src_frame_alt_ref) { + // Special case where we are coding over the top of a previous + // alt ref frame. + // Segment coding disabled for compred testing - // Special case where we are coding over the top of a previous - // alt ref frame. - // Segment coding disabled for compred testing - else if (cpi->is_src_frame_alt_ref) { // Enable ref frame features for segment 0 as well vp9_enable_segfeature(xd, 0, SEG_LVL_REF_FRAME); vp9_enable_segfeature(xd, 1, SEG_LVL_REF_FRAME); @@ -490,9 +479,9 @@ static void configure_static_seg_features(VP9_COMP *cpi) { } // Enable data udpate xd->update_mb_segmentation_data = 1; - } - // All other frames. - else { + } else { + // All other frames. + // No updates.. leave things as they are. xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; @@ -628,7 +617,6 @@ static void set_rd_speed_thresholds(VP9_COMP *cpi, int mode, int speed) { sf->thresh_mult[THR_D63_PRED ] += speed_multiplier * 1500; sf->thresh_mult[THR_B_PRED ] += speed_multiplier * 2500; - sf->thresh_mult[THR_I8X8_PRED] += speed_multiplier * 2500; sf->thresh_mult[THR_NEWMV ] += speed_multiplier * 1000; sf->thresh_mult[THR_NEWG ] += speed_multiplier * 1000; @@ -867,9 +855,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; - cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair; - cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8; - cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16; vp9_init_quantizer(cpi); @@ -959,23 +944,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cpi->mb_norm_activity_map, vpx_calloc(sizeof(unsigned int), cm->mb_rows * cm->mb_cols)); - - vpx_free(cpi->twopass.total_stats); - - cpi->twopass.total_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.total_left_stats); - cpi->twopass.total_left_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - vpx_free(cpi->twopass.this_frame_stats); - - cpi->twopass.this_frame_stats = vpx_calloc(1, sizeof(FIRSTPASS_STATS)); - - if (!cpi->twopass.total_stats || - !cpi->twopass.total_left_stats || - !cpi->twopass.this_frame_stats) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate firstpass stats"); } @@ -1647,6 +1615,12 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { BFP(BLOCK_8X8, vp9_sad8x8, vp9_variance8x8, vp9_sub_pixel_variance8x8, NULL, NULL, NULL, vp9_sad8x8x3, vp9_sad8x8x8, vp9_sad8x8x4d) + BFP(BLOCK_4X8, NULL, vp9_variance4x8, NULL, + NULL, NULL, NULL, NULL, NULL, NULL) + + BFP(BLOCK_8X4, NULL, vp9_variance8x4, NULL, + NULL, NULL, NULL, NULL, NULL, NULL) + BFP(BLOCK_4X4, vp9_sad4x4, vp9_variance4x4, vp9_sub_pixel_variance4x4, NULL, NULL, NULL, vp9_sad4x4x3, vp9_sad4x4x8, vp9_sad4x4x4d) @@ -3326,9 +3300,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_copy(cpi->common.fc.ymode_counts, cpi->ymode_count); vp9_copy(cpi->common.fc.uv_mode_counts, cpi->y_uv_mode_count); vp9_copy(cpi->common.fc.bmode_counts, cpi->bmode_count); - vp9_copy(cpi->common.fc.i8x8_mode_counts, cpi->i8x8_mode_count); vp9_copy(cpi->common.fc.sub_mv_ref_counts, cpi->sub_mv_ref_count); - vp9_copy(cpi->common.fc.mbsplit_counts, cpi->mbsplit_count); vp9_copy(cpi->common.fc.partition_counts, cpi->partition_count); #if CONFIG_COMP_INTERINTRA_PRED vp9_copy(cpi->common.fc.interintra_counts, cpi->interintra_count); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index aeaf1bda3..cc91ba5d2 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -48,9 +48,9 @@ #define KEY_FRAME_CONTEXT 5 #if CONFIG_COMP_INTERINTRA_PRED -#define MAX_MODES 54 +#define MAX_MODES 53 #else -#define MAX_MODES 42 +#define MAX_MODES 41 #endif #define MIN_THRESHMULT 32 @@ -72,7 +72,6 @@ typedef struct { // Stats int y_modes[VP9_YMODES]; int uv_modes[VP9_UV_MODES]; - int i8x8_modes[VP9_I8X8_MODES]; int b_modes[B_MODE_COUNT]; int inter_y_modes[MB_MODE_COUNT]; int inter_uv_modes[VP9_UV_MODES]; @@ -100,9 +99,7 @@ typedef struct { vp9_prob ymode_prob[VP9_YMODES - 1]; /* interframe intra mode probs */ vp9_prob uv_mode_prob[VP9_YMODES][VP9_UV_MODES - 1]; vp9_prob bmode_prob[VP9_NKF_BINTRAMODES - 1]; - vp9_prob i8x8_mode_prob[VP9_I8X8_MODES - 1]; vp9_prob sub_mv_ref_prob[SUBMVREF_COUNT][VP9_SUBMVREFS - 1]; - vp9_prob mbsplit_prob[VP9_NUMMBSPLITS - 1]; vp9_prob partition_prob[NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; vp9_prob switchable_interp_prob[VP9_SWITCHABLE_FILTERS + 1] @@ -207,7 +204,6 @@ typedef enum { THR_SPLITA, THR_B_PRED, - THR_I8X8_PRED, THR_COMP_ZEROLG, THR_COMP_NEARESTLG, @@ -273,10 +269,12 @@ typedef struct { } SPEED_FEATURES; enum BlockSize { - BLOCK_16X8 = PARTITIONING_16X8, - BLOCK_8X16 = PARTITIONING_8X16, - BLOCK_8X8 = PARTITIONING_8X8, - BLOCK_4X4 = PARTITIONING_4X4, + BLOCK_4X4, + BLOCK_4X8, + BLOCK_8X4, + BLOCK_8X8, + BLOCK_8X16, + BLOCK_16X8, BLOCK_16X16, BLOCK_MAX_SEGMENTS, BLOCK_32X32 = BLOCK_MAX_SEGMENTS, @@ -451,9 +449,7 @@ typedef struct VP9_COMP { int sb_ymode_count [VP9_I32X32_MODES]; int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ int bmode_count[VP9_NKF_BINTRAMODES]; - int i8x8_mode_count[VP9_I8X8_MODES]; int sub_mv_ref_count[SUBMVREF_COUNT][VP9_SUBMVREFS]; - int mbsplit_count[VP9_NUMMBSPLITS]; int y_uv_mode_count[VP9_YMODES][VP9_UV_MODES]; unsigned int partition_count[NUM_PARTITION_CONTEXTS][PARTITION_TYPES]; #if CONFIG_COMP_INTERINTRA_PRED @@ -557,10 +553,10 @@ typedef struct VP9_COMP { unsigned int section_intra_rating; unsigned int next_iiratio; unsigned int this_iiratio; - FIRSTPASS_STATS *total_stats; - FIRSTPASS_STATS *this_frame_stats; + FIRSTPASS_STATS total_stats; + FIRSTPASS_STATS this_frame_stats; FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; - FIRSTPASS_STATS *total_left_stats; + FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; int64_t clip_bits_total; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 77e19721c..4ed8f6326 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -75,57 +75,52 @@ static void quantize(int16_t *zbin_boost_orig_ptr, *eob_ptr = eob + 1; } -void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, - int y_blocks) { +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs, + TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - const int *pt_scan = get_scan_4x4(tx_type); + const int mul = n_coeffs == 1024 ? 2 : 1; + const int *scan; + + // These contexts may be available in the caller + switch (n_coeffs) { + case 4 * 4: + scan = get_scan_4x4(tx_type); + break; + case 8 * 8: + scan = get_scan_8x8(tx_type); + break; + case 16 * 16: + scan = get_scan_16x16(tx_type); + break; + default: + scan = vp9_default_zig_zag1d_32x32; + break; + } - quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, - BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 16, mb->skip_block, - mb->plane[pb_idx.plane].zbin, - mb->plane[pb_idx.plane].round, - mb->plane[pb_idx.plane].quant, - mb->plane[pb_idx.plane].quant_shift, - BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), - BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), - xd->plane[pb_idx.plane].dequant, - mb->plane[pb_idx.plane].zbin_extra, - &xd->plane[pb_idx.plane].eobs[pb_idx.block], - pt_scan, 1); + quantize(mb->plane[plane].zrun_zbin_boost, + BLOCK_OFFSET(mb->plane[plane].coeff, block, 16), + n_coeffs, mb->skip_block, + mb->plane[plane].zbin, + mb->plane[plane].round, + mb->plane[plane].quant, + mb->plane[plane].quant_shift, + BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16), + BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), + xd->plane[plane].dequant, + mb->plane[plane].zbin_extra, + &xd->plane[plane].eobs[block], + scan, mul); } -void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, +void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - const int *pt_scan = get_scan_8x8(tx_type); - - quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, - BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 64, mb->skip_block, - mb->plane[pb_idx.plane].zbin, - mb->plane[pb_idx.plane].round, - mb->plane[pb_idx.plane].quant, - mb->plane[pb_idx.plane].quant_shift, - BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), - BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), - xd->plane[pb_idx.plane].dequant, - mb->plane[pb_idx.plane].zbin_extra, - &xd->plane[pb_idx.plane].eobs[pb_idx.block], - pt_scan, 1); -} - -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, - int y_blocks) { - MACROBLOCKD *const xd = &mb->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - const int *pt_scan = get_scan_16x16(tx_type); + const int *pt_scan = get_scan_4x4(tx_type); quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 256, mb->skip_block, + 16, mb->skip_block, mb->plane[pb_idx.plane].zbin, mb->plane[pb_idx.plane].round, mb->plane[pb_idx.plane].quant, @@ -138,120 +133,6 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, pt_scan, 1); } -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) { - MACROBLOCKD *const xd = &mb->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - - quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, - BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 1024, mb->skip_block, - mb->plane[pb_idx.plane].zbin, - mb->plane[pb_idx.plane].round, - mb->plane[pb_idx.plane].quant, - mb->plane[pb_idx.plane].quant_shift, - BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), - BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), - xd->plane[pb_idx.plane].dequant, - mb->plane[pb_idx.plane].zbin_extra, - &xd->plane[pb_idx.plane].eobs[pb_idx.block], - vp9_default_zig_zag1d_32x32, 2); -} - -void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bw = 1 << (b_width_log2(bsize) - 3); - const int bh = 1 << (b_height_log2(bsize) - 3); - int n; - - for (n = 0; n < bw * bh; n++) - vp9_regular_quantize_b_32x32(x, n * 64, bw * bh * 64); -} - -void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - const int bstride = 16 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - 4 * x_idx + y_idx * bstride); - x->quantize_b_16x16(x, n * 16, tx_type, 16 * bw * bh); - } -} - -void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - const int bstride = 4 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - 2 * x_idx + y_idx * bstride); - x->quantize_b_8x8(x, n * 4, tx_type, 4 * bw * bh); - } -} - -void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - x->quantize_b_4x4(x, n, tx_type, bw * bh); - } -} - -void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - assert(bsize == BLOCK_SIZE_SB64X64); - vp9_regular_quantize_b_32x32(x, 256, 256); - vp9_regular_quantize_b_32x32(x, 320, 256); -} - -void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i += 16) - x->quantize_b_16x16(x, i, DCT_DCT, uoff); -} - -void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i += 4) - x->quantize_b_8x8(x, i, DCT_DCT, uoff); -} - -void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i++) - x->quantize_b_4x4(x, i, DCT_DCT, uoff); -} - -/* quantize_b_pair function pointer in MACROBLOCK structure is set to one of - * these two C functions if corresponding optimized routine is not available. - * NEON optimized version implements currently the fast quantization for pair - * of blocks. */ -void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2, - int y_blocks) { - vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks); - vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks); -} - static void invert_quant(int16_t *quant, uint8_t *shift, int d) { unsigned t; int l; @@ -266,6 +147,7 @@ static void invert_quant(int16_t *quant, uint8_t *shift, int d) { void vp9_init_quantizer(VP9_COMP *cpi) { int i; int quant_val; + int quant_uv_val; int q; static const int zbin_boost[16] = { 0, 0, 0, 8, 8, 8, 10, 12, @@ -293,52 +175,36 @@ void vp9_init_quantizer(VP9_COMP *cpi) { cpi->common.uv_dequant[q][0] = quant_val; cpi->zrun_zbin_boost_uv[q][0] = (quant_val * zbin_boost[0]) >> 7; + quant_val = vp9_ac_quant(q, 0); + cpi->common.y_dequant[q][1] = quant_val; + quant_uv_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q); + cpi->common.uv_dequant[q][1] = quant_uv_val; // all the 4x4 ac values =; for (i = 1; i < 16; i++) { int rc = vp9_default_zig_zag1d_4x4[i]; - quant_val = vp9_ac_quant(q, 0); invert_quant(cpi->Y1quant[q] + rc, cpi->Y1quant_shift[q] + rc, quant_val); cpi->Y1zbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); cpi->Y1round[q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.y_dequant[q][rc] = quant_val; cpi->zrun_zbin_boost_y1[q][i] = ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7); - quant_val = vp9_ac_quant(q, cpi->common.uv_ac_delta_q); - invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, quant_val); - cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_val, 7); - cpi->UVround[q][rc] = (qrounding_factor * quant_val) >> 7; - cpi->common.uv_dequant[q][rc] = quant_val; + invert_quant(cpi->UVquant[q] + rc, cpi->UVquant_shift[q] + rc, + quant_uv_val); + cpi->UVzbin[q][rc] = ROUND_POWER_OF_TWO(qzbin_factor * quant_uv_val, 7); + cpi->UVround[q][rc] = (qrounding_factor * quant_uv_val) >> 7; cpi->zrun_zbin_boost_uv[q][i] = - ROUND_POWER_OF_TWO(quant_val * zbin_boost[i], 7); + ROUND_POWER_OF_TWO(quant_uv_val * zbin_boost[i], 7); } } } void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) { int i; - int qindex; MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; int segment_id = xd->mode_info_context->mbmi.segment_id; - - // Select the baseline MB Q index allowing for any segment level change. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_ALT_Q)) { - if (xd->mb_segment_abs_delta == SEGMENT_ABSDATA) { - // Abs Value - qindex = vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - } else { - // Delta Value - qindex = cpi->common.base_qindex + - vp9_get_segdata(xd, segment_id, SEG_LVL_ALT_Q); - - // Clamp to valid range - qindex = clamp(qindex, 0, MAXQ); - } - } else { - qindex = cpi->common.base_qindex; - } + const int qindex = vp9_get_qindex(xd, segment_id, cpi->common.base_qindex); // Y zbin_extra = (cpi->common.y_dequant[qindex][1] * diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index fd7a4bb4f..2b1eeabbe 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -22,9 +22,8 @@ #define prototype_quantize_mb(sym) \ void (sym)(MACROBLOCK *x) -#if ARCH_X86 || ARCH_X86_64 -#include "x86/vp9_quantize_x86.h" -#endif +void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coefs, + TX_TYPE tx_type); void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2, int y_blocks); @@ -32,20 +31,6 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks); void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks); -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, - int y_blocks); -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, - int y_blocks); - -void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); - struct VP9_COMP; extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 47252253d..0f84b1a37 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -138,9 +138,7 @@ void vp9_save_coding_context(VP9_COMP *cpi) { vp9_copy(cc->sb_ymode_prob, cm->fc.sb_ymode_prob); vp9_copy(cc->bmode_prob, cm->fc.bmode_prob); vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); - vp9_copy(cc->i8x8_mode_prob, cm->fc.i8x8_mode_prob); vp9_copy(cc->sub_mv_ref_prob, cm->fc.sub_mv_ref_prob); - vp9_copy(cc->mbsplit_prob, cm->fc.mbsplit_prob); vp9_copy(cc->partition_prob, cm->fc.partition_prob); // Stats @@ -198,10 +196,8 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->fc.ymode_prob, cc->ymode_prob); vp9_copy(cm->fc.sb_ymode_prob, cc->sb_ymode_prob); vp9_copy(cm->fc.bmode_prob, cc->bmode_prob); - vp9_copy(cm->fc.i8x8_mode_prob, cc->i8x8_mode_prob); vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); vp9_copy(cm->fc.sub_mv_ref_prob, cc->sub_mv_ref_prob); - vp9_copy(cm->fc.mbsplit_prob, cc->mbsplit_prob); vp9_copy(cm->fc.partition_prob, cc->partition_prob); // Stats diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0e85a0c71..90d56b2d2 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -13,8 +13,8 @@ #include <math.h> #include <limits.h> #include <assert.h> -#include "vp9/common/vp9_pragmas.h" +#include "vp9/common/vp9_pragmas.h" #include "vp9/encoder/vp9_tokenize.h" #include "vp9/encoder/vp9_treewriter.h" #include "vp9/encoder/vp9_onyx_int.h" @@ -34,7 +34,6 @@ #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_systemdependent.h" #include "vp9/encoder/vp9_encodemv.h" - #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" @@ -42,8 +41,6 @@ #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_common.h" -#define MAXF(a,b) (((a) > (b)) ? (a) : (b)) - #define INVALID_MV 0x80008000 /* Factor to weigh the rate for switchable interp filters */ @@ -105,7 +102,6 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {SPLITMV, ALTREF_FRAME, NONE}, {I4X4_PRED, INTRA_FRAME, NONE}, - {I8X8_PRED, INTRA_FRAME, NONE}, /* compound prediction modes */ {ZEROMV, LAST_FRAME, GOLDEN_FRAME}, @@ -155,11 +151,9 @@ static void fill_token_costs(vp9_coeff_count *c, for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < REF_TYPES; j++) for (k = 0; k < COEF_BANDS; k++) - for (l = 0; l < PREV_COEF_CONTEXTS; l++) { - vp9_cost_tokens_skip((int *)(c[i][j][k][l]), - p[i][j][k][l], + for (l = 0; l < PREV_COEF_CONTEXTS; l++) + vp9_cost_tokens_skip((int *)c[i][j][k][l], p[i][j][k][l], vp9_coef_tree); - } } static int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, @@ -182,7 +176,7 @@ void vp9_init_me_luts() { for (i = 0; i < QINDEX_RANGE; i++) { sad_per_bit16lut[i] = (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107); - sad_per_bit4lut[i] = (int)((0.063 * vp9_convert_qindex_to_q(i)) + 2.742); + sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742); } } @@ -206,7 +200,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { // for key frames, golden frames and arf frames. // if (cpi->common.refresh_golden_frame || // cpi->common.refresh_alt_ref_frame) - qindex = (qindex < 0) ? 0 : ((qindex > MAXQ) ? MAXQ : qindex); + qindex = clamp(qindex, 0, MAXQ); cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { @@ -291,7 +285,7 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { } static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, - int ib, PLANE_TYPE type, + int plane, int block, PLANE_TYPE type, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, @@ -302,10 +296,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int c = 0; int cost = 0, pad; const int *scan, *nb; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block]; - const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, - pb_idx.block, 16); + const int eob = xd->plane[plane].eobs[block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, + block, 16); const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; @@ -332,7 +325,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, #endif // Check for consistency of tx_size with mode info - assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); + assert((!type && !plane) || (type && plane)); if (type == PLANE_TYPE_Y_WITH_DC) { assert(xd->mode_info_context->mbmi.txfm_size == tx_size); } else { @@ -343,7 +336,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, switch (tx_size) { case TX_4X4: { tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, ib) : DCT_DCT; + get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; coef_probs = cm->fc.coef_probs_4x4; @@ -357,7 +350,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, case TX_8X8: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; above_ec = (A[0] + A[1]) != 0; @@ -373,7 +366,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, case TX_16X16: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; scan = get_scan_16x16(tx_type); @@ -563,17 +556,19 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_32X32; - } else if ( cm->txfm_mode == ALLOW_16X16 || - (max_txfm_size == TX_16X16 && cm->txfm_mode == ALLOW_32X32) || - (cm->txfm_mode == TX_MODE_SELECT && - rd[TX_16X16][1] < rd[TX_8X8][1] && - rd[TX_16X16][1] < rd[TX_4X4][1])) { + } else if (max_txfm_size >= TX_16X16 && + (cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || + (cm->txfm_mode == TX_MODE_SELECT && + rd[TX_16X16][1] < rd[TX_8X8][1] && + rd[TX_16X16][1] < rd[TX_4X4][1]))) { mbmi->txfm_size = TX_16X16; } else if (cm->txfm_mode == ALLOW_8X8 || + cm->txfm_mode == ALLOW_16X16 || + cm->txfm_mode == ALLOW_32X32 || (cm->txfm_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) { mbmi->txfm_size = TX_8X8; } else { - assert(cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT); mbmi->txfm_size = TX_4X4; } @@ -583,13 +578,14 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, txfm_cache[ONLY_4X4] = rd[TX_4X4][0]; txfm_cache[ALLOW_8X8] = rd[TX_8X8][0]; - txfm_cache[ALLOW_16X16] = rd[TX_16X16][0]; - txfm_cache[ALLOW_32X32] = rd[max_txfm_size][0]; + txfm_cache[ALLOW_16X16] = rd[MIN(max_txfm_size, TX_16X16)][0]; + txfm_cache[ALLOW_32X32] = rd[MIN(max_txfm_size, TX_32X32)][0]; if (max_txfm_size == TX_32X32 && rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_32X32][1]; - else if (rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) + else if (max_txfm_size >= TX_16X16 && + rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) txfm_cache[TX_MODE_SELECT] = rd[TX_16X16][1]; else txfm_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ? @@ -603,16 +599,17 @@ static int block_error(int16_t *coeff, int16_t *dqcoeff, for (i = 0; i < block_size; i++) { int this_diff = coeff[i] - dqcoeff[i]; - error += this_diff * this_diff; + error += (unsigned)this_diff * this_diff; } error >>= shift; return error > INT_MAX ? INT_MAX : (int)error; } -static int block_error_sby(MACROBLOCK *x, int block_size, int shift) { +static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, - block_size, shift); + 16 << (bwl + bhl), shift); } static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { @@ -630,155 +627,54 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { return sum > INT_MAX ? INT_MAX : (int)sum; } -static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - int cost = 0, b; +static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x, + int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const int bw = 1 << bwl, bh = 1 << bhl; ENTROPY_CONTEXT t_above[16], t_left[16]; + int block, cost; - vpx_memcpy(&t_above, xd->plane[0].above_context, + vpx_memcpy(&t_above, xd->plane[plane].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, + vpx_memcpy(&t_left, xd->plane[plane].left_context, sizeof(ENTROPY_CONTEXT) * bh); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx, t_left + y_idx, - TX_4X4, bw * bh); - } - - return cost; -} - -static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sby_4x4(x, bsize); - vp9_quantize_sby_4x4(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); - *rate = rdcost_sby_4x4(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} - -static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; - - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 2 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 2 * bh); - - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 2, t_left + y_idx * 2, - TX_8X8, 4 * bw * bh); - } - - return cost; -} - -static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sby_8x8(x, bsize); - vp9_quantize_sby_8x8(x, bsize); + cost = 0; + for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) { + int x_idx, y_idx; - *distortion = block_error_sby(x, 16 << (bhl + bwl), 2); - *rate = rdcost_sby_8x8(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} - -static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; + txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2, + &x_idx, &y_idx); - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 4 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 4 * bh); - - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 4, t_left + y_idx * 4, - TX_16X16, bw * bh * 16); + cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type, + t_above + x_idx, t_left + y_idx, + tx_size, bw * bh); } return cost; } -static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; +static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { + int cost = 0, plane; - xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sby_16x16(x, bsize); - vp9_quantize_sby_16x16(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); - *rate = rdcost_sby_16x16(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} - -static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - int cost = 0, b; - MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; - - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 8 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 8 * bh); - - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 8, t_left + y_idx * 8, - TX_32X32, bw * bh * 64); + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + cost += rdcost_plane(cm, x, plane, bsize, tx_size); } - return cost; } -static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); +static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + xd->mode_info_context->mbmi.txfm_size = tx_size; + vp9_xform_quant_sby(cm, x, bsize); - xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sby_32x32(x, bsize); - vp9_quantize_sby_32x32(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 0); - *rate = rdcost_sby_32x32(cm, x, bsize); + *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_plane(cm, x, 0, bsize, tx_size); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -792,14 +688,19 @@ static void super_block_yrd(VP9_COMP *cpi, vp9_subtract_sby(x, bs); if (bs >= BLOCK_SIZE_SB32X32) - super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], - bs); - super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], bs); - super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs); - super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs); + super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], + bs, TX_32X32); + if (bs >= BLOCK_SIZE_MB16X16) + super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], + bs, TX_16X16); + super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, + TX_8X8); + super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, + TX_4X4); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, - TX_32X32 - (bs < BLOCK_SIZE_SB32X32)); + TX_32X32 - (bs < BLOCK_SIZE_SB32X32) + - (bs < BLOCK_SIZE_MB16X16)); } static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, @@ -816,17 +717,25 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, VP9_COMMON *const cm = &cpi->common; const int src_stride = x->plane[0].src.stride; uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_uint8(xd, + BLOCK_SIZE_SB8X8, + 0, ib, x->plane[0].src.buf, src_stride); int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_int16(xd, + BLOCK_SIZE_SB8X8, + 0, ib, x->plane[0].src_diff); int16_t* const diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_int16(xd, + BLOCK_SIZE_SB8X8, + 0, ib, xd->plane[0].diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, + raster_block_offset_uint8(xd, + BLOCK_SIZE_SB8X8, + 0, ib, xd->plane[0].dst.buf, xd->plane[0].dst.stride); ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; @@ -839,7 +748,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, * */ DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); - assert(ib < 16); + assert(ib < 4); #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[ib].as_mode.context = vp9_find_bpred_context(xd, ib, dst, xd->plane[0].dst.stride); @@ -867,25 +776,27 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = bmode_costs[mode]; #endif - vp9_intra4x4_predict(xd, ib, mode, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 16, + vp9_intra4x4_predict(xd, ib, + BLOCK_SIZE_SB8X8, + mode, dst, xd->plane[0].dst.stride); + vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); xd->mode_info_context->bmi[ib].as_mode.first = mode; tx_type = get_tx_type_4x4(xd, ib); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); + vp9_short_fht4x4(src_diff, coeff, 8, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); } else { - x->fwd_txm4x4(src_diff, coeff, 32); + x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, ib, tx_type, 16); } tempa = ta; templ = tl; - ratey = cost_coeffs(cm, x, ib, + ratey = cost_coeffs(cm, x, 0, ib, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); rate += ratey; distortion = vp9_block_error(coeff, @@ -911,13 +822,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // inverse transform if (best_tx_type != DCT_DCT) - vp9_short_iht4x4(best_dqcoeff, diff, 16, best_tx_type); + vp9_short_iht4x4(best_dqcoeff, diff, 8, best_tx_type); else - xd->inv_txm4x4(best_dqcoeff, diff, 32); + xd->inv_txm4x4(best_dqcoeff, diff, 16); - vp9_intra4x4_predict(xd, ib, *best_mode, + vp9_intra4x4_predict(xd, ib, + BLOCK_SIZE_SB8X8, + *best_mode, dst, xd->plane[0].dst.stride); - vp9_recon_b(dst, diff, + vp9_recon_b(dst, diff, 8, dst, xd->plane[0].dst.stride); return best_rd; @@ -932,7 +845,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[4], t_left[4]; + ENTROPY_CONTEXT t_above[2], t_left[2]; int *bmode_costs; vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); @@ -941,15 +854,17 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, xd->mode_info_context->mbmi.mode = I4X4_PRED; bmode_costs = mb->inter_bmode_costs; - for (i = 0; i < 16; i++) { - const int x_idx = i & 3, y_idx = i >> 2; + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); #if CONFIG_NEWBINTRAMODES uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, + raster_block_offset_uint8(xd, + BLOCK_SIZE_SB8X8, + 0, i, xd->plane[0].dst.buf, xd->plane[0].dst.stride); #endif @@ -1046,403 +961,16 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } -static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, - B_PREDICTION_MODE *best_mode, - int *mode_costs, - ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int *bestrate, int *bestratey, - int *bestdistortion) { - VP9_COMMON *const cm = &cpi->common; - MB_PREDICTION_MODE mode; - MACROBLOCKD *xd = &x->e_mbd; - int64_t best_rd = INT64_MAX; - int distortion = 0, rate = 0; - ENTROPY_CONTEXT ta[2], tl[2], ta_temp[2], tl_temp[2]; - // perform transformation of dimension 8x8 - // note the input and output index mapping - int idx = (ib & 0x02) ? (ib + 2) : ib; - const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src.buf, src_stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16); - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); - - assert(ib < 16); - vpx_memcpy(ta, a, sizeof(ta)); - vpx_memcpy(tl, l, sizeof(tl)); - for (mode = DC_PRED; mode <= TM_PRED; mode++) { - int64_t this_rd; - int rate_t = 0; - - // FIXME rate for compound mode and second intrapred mode - rate = mode_costs[mode]; - xd->mode_info_context->bmi[ib].as_mode.first = mode; - - vp9_intra8x8_predict(xd, ib, mode, dst, xd->plane[0].dst.stride); - - vp9_subtract_block(8, 8, src_diff, 16, - src, src_stride, - dst, xd->plane[0].dst.stride); - - vpx_memcpy(ta_temp, ta, sizeof(ta)); - vpx_memcpy(tl_temp, tl, sizeof(tl)); - - if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { - TX_TYPE tx_type = get_tx_type_8x8(xd, ib); - if (tx_type != DCT_DCT) - vp9_short_fht8x8(src_diff, coeff, 16, tx_type); - else - x->fwd_txm8x8(src_diff, coeff, 32); - x->quantize_b_8x8(x, idx, tx_type, 16); - - // compute quantization mse of 8x8 block - distortion = vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); - - rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - ta_temp, tl_temp, TX_8X8, 16); - - rate += rate_t; - } else { - static const int iblock[4] = {0, 1, 4, 5}; - TX_TYPE tx_type; - int i; - - distortion = 0; - rate_t = 0; - for (i = 0; i < 4; ++i) { - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, - 0, ib + iblock[i], - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, - ib + iblock[i], 16); - int do_two = 0; - tx_type = get_tx_type_4x4(xd, ib + iblock[i]); - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16, tx_type); - x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16); - } else if (!(i & 1) && - get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { - x->fwd_txm8x4(src_diff, coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1, 16); - do_two = 1; - } else { - x->fwd_txm4x4(src_diff, coeff, 32); - x->quantize_b_4x4(x, ib + iblock[i], tx_type, 16); - } - distortion += vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16), - 16 << do_two); - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, - &ta_temp[i & 1], &tl_temp[i >> 1], - TX_4X4, 16); - if (do_two) { - i++; - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, - &ta_temp[i & 1], &tl_temp[i >> 1], - TX_4X4, 16); - } - } - rate += rate_t; - } - - distortion >>= 2; - this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); - if (this_rd < best_rd) { - *bestrate = rate; - *bestratey = rate_t; - *bestdistortion = distortion; - vpx_memcpy(a, ta_temp, sizeof(ta_temp)); - vpx_memcpy(l, tl_temp, sizeof(tl_temp)); - best_rd = this_rd; - *best_mode = mode; - } - } - xd->mode_info_context->bmi[ib].as_mode.first = (*best_mode); - vp9_encode_intra8x8(x, ib); - - return best_rd; -} - -static int64_t rd_pick_intra8x8mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, - int *Rate, int *rate_y, - int *Distortion, int64_t best_rd) { - MACROBLOCKD *const xd = &mb->e_mbd; - int i, ib; - int cost = mb->mbmode_cost [xd->frame_type] [I8X8_PRED]; - int distortion = 0; - int tot_rate_y = 0; - int64_t total_rd = 0; - ENTROPY_CONTEXT t_above[4], t_left[4]; - int *i8x8mode_costs; - - vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above)); - vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left)); - - xd->mode_info_context->mbmi.mode = I8X8_PRED; - i8x8mode_costs = mb->i8x8_mode_costs; - - for (i = 0; i < 4; i++) { - const int x_idx = i & 1, y_idx = i >> 1; - MODE_INFO *const mic = xd->mode_info_context; - B_PREDICTION_MODE UNINITIALIZED_IS_SAFE(best_mode); - int UNINITIALIZED_IS_SAFE(r), UNINITIALIZED_IS_SAFE(ry), UNINITIALIZED_IS_SAFE(d); - - ib = vp9_i8x8_block[i]; - total_rd += rd_pick_intra8x8block(cpi, mb, ib, &best_mode, i8x8mode_costs, - t_above + x_idx * 2, t_left + y_idx * 2, - &r, &ry, &d); - cost += r; - distortion += d; - tot_rate_y += ry; - mic->bmi[ib].as_mode.first = best_mode; - } - - *Rate = cost; - *rate_y = tot_rate_y; - *Distortion = distortion; - return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); -} - -static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x, - int *rate, int *rate_y, - int *distortion, - int *mode8x8, - int64_t best_yrd, - int64_t *txfm_cache) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; - int cost0 = vp9_cost_bit(cm->prob_tx[0], 0); - int cost1 = vp9_cost_bit(cm->prob_tx[0], 1); - int64_t tmp_rd_4x4s, tmp_rd_8x8s; - int64_t tmp_rd_4x4, tmp_rd_8x8, tmp_rd; - int r4x4, tok4x4, d4x4, r8x8, tok8x8, d8x8; - - mbmi->txfm_size = TX_4X4; - tmp_rd_4x4 = rd_pick_intra8x8mby_modes(cpi, x, &r4x4, &tok4x4, - &d4x4, best_yrd); - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - mbmi->txfm_size = TX_8X8; - tmp_rd_8x8 = rd_pick_intra8x8mby_modes(cpi, x, &r8x8, &tok8x8, - &d8x8, best_yrd); - txfm_cache[ONLY_4X4] = tmp_rd_4x4; - txfm_cache[ALLOW_8X8] = tmp_rd_8x8; - txfm_cache[ALLOW_16X16] = tmp_rd_8x8; - tmp_rd_4x4s = tmp_rd_4x4 + RDCOST(x->rdmult, x->rddiv, cost0, 0); - tmp_rd_8x8s = tmp_rd_8x8 + RDCOST(x->rdmult, x->rddiv, cost1, 0); - txfm_cache[TX_MODE_SELECT] = tmp_rd_4x4s < tmp_rd_8x8s ? - tmp_rd_4x4s : tmp_rd_8x8s; - if (cm->txfm_mode == TX_MODE_SELECT) { - if (tmp_rd_4x4s < tmp_rd_8x8s) { - *rate = r4x4 + cost0; - *rate_y = tok4x4 + cost0; - *distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4s; - } else { - *rate = r8x8 + cost1; - *rate_y = tok8x8 + cost1; - *distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8s; - - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - } - } else if (cm->txfm_mode == ONLY_4X4) { - *rate = r4x4; - *rate_y = tok4x4; - *distortion = d4x4; - mbmi->txfm_size = TX_4X4; - tmp_rd = tmp_rd_4x4; - } else { - *rate = r8x8; - *rate_y = tok8x8; - *distortion = d8x8; - mbmi->txfm_size = TX_8X8; - tmp_rd = tmp_rd_8x8; - - mode8x8[0] = xd->mode_info_context->bmi[0].as_mode.first; - mode8x8[1] = xd->mode_info_context->bmi[2].as_mode.first; - mode8x8[2] = xd->mode_info_context->bmi[8].as_mode.first; - mode8x8[3] = xd->mode_info_context->bmi[10].as_mode.first; - } - - return tmp_rd; -} - -static int rd_cost_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - int yoff = 4 * bw * bh; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 2 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 2 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b, PLANE_TYPE_UV, - t_above + x_idx, t_left + y_idx, - TX_4X4, bw * bh * 4); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - - *rate = rd_cost_sbuv_4x4(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - int yoff = 16 * bw * bh; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 4 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 4 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 4, PLANE_TYPE_UV, - t_above + x_idx * 2, t_left + y_idx * 2, - TX_8X8, bw * bh * 16); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { +static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, + int *skippable, BLOCK_SIZE_TYPE bsize, + TX_SIZE uv_tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + vp9_xform_quant_sbuv(cm, x, bsize); - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - - *rate = rd_cost_sbuv_8x8(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - int yoff = 64 * bw * bh; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 8 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 8 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 16, PLANE_TYPE_UV, - t_above + x_idx * 4, t_left + y_idx * 4, - TX_16X16, bw * bh * 64); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - - *rate = rd_cost_sbuv_16x16(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 4, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 4); - int yoff = 256 * bh * bw; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 16 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 16 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b * (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 64, PLANE_TYPE_UV, - t_above + x_idx * 8, t_left + y_idx * 8, - TX_32X32, 256 * bh * bw); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} -#undef UVCTX - -static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_32x32(x, bsize); - vp9_quantize_sbuv_32x32(x, bsize); - - *rate = rd_cost_sbuv_32x32(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 0); - *skip = vp9_sbuv_is_skippable(xd, bsize); + *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_uv(cm, x, bsize, uv_tx_size); + *skippable = vp9_sbuv_is_skippable(xd, bsize); } static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, @@ -1454,14 +982,17 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, vp9_subtract_sbuv(x, bsize); if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { - super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_32X32); } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { - super_block_uvrd_16x16(cm, x, rate, distortion, skippable, bsize); - } else if (mbmi->txfm_size >= TX_8X8) { - super_block_uvrd_8x8(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_16X16); + } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_8X8); } else { - assert(mbmi->txfm_size == TX_4X4); - super_block_uvrd_4x4(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_4X4); } } @@ -1524,28 +1055,25 @@ void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { x->e_mbd.mode_info_context->mbmi.mv[0].as_int = mv->as_int; } -static int labels2mode( - MACROBLOCK *x, - int const *labelings, int which_label, - B_PREDICTION_MODE this_mode, - int_mv *this_mv, int_mv *this_second_mv, - int_mv seg_mvs[MAX_REF_FRAMES - 1], - int_mv *best_ref_mv, - int_mv *second_best_ref_mv, - int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { +static int labels2mode(MACROBLOCK *x, + int const *labelings, int which_label, + B_PREDICTION_MODE this_mode, + int_mv *this_mv, int_mv *this_second_mv, + int_mv seg_mvs[MAX_REF_FRAMES - 1], + int_mv *best_ref_mv, + int_mv *second_best_ref_mv, + int *mvjcost, int *mvcost[2], VP9_COMP *cpi) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mode_info_context; MB_MODE_INFO * mbmi = &mic->mbmi; const int mis = xd->mode_info_stride; - int i, cost = 0, thismvcost = 0; /* We have to be careful retrieving previously-encoded motion vectors. - Ones from this macroblock have to be pulled from the BLOCKD array - as they have not yet made it to the bmi array in our MB_MODE_INFO. */ - for (i = 0; i < 16; ++i) { - const int row = i >> 2, col = i & 3; - + Ones from this macroblock have to be pulled from the BLOCKD array + as they have not yet made it to the bmi array in our MB_MODE_INFO. */ + for (i = 0; i < 4; ++i) { + const int row = i >> 1, col = i & 1; B_PREDICTION_MODE m; if (labelings[i] != which_label) @@ -1553,7 +1081,7 @@ static int labels2mode( if (col && labelings[i] == labelings[i - 1]) m = LEFT4X4; - else if (row && labelings[i] == labelings[i - 4]) + else if (row && labelings[i] == labelings[i - 2]) m = ABOVE4X4; else { // the only time we should do costing for new motion vector or mode @@ -1563,7 +1091,7 @@ static int labels2mode( if (mbmi->second_ref_frame > 0) { this_mv->as_int = seg_mvs[mbmi->ref_frame - 1].as_int; this_second_mv->as_int = - seg_mvs[mbmi->second_ref_frame - 1].as_int; + seg_mvs[mbmi->second_ref_frame - 1].as_int; } thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, @@ -1576,17 +1104,17 @@ static int labels2mode( break; case LEFT4X4: this_mv->as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : - left_block_mv(xd, mic, i); + left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) this_second_mv->as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : - left_block_second_mv(xd, mic, i); + left_block_second_mv(xd, mic, i); break; case ABOVE4X4: - this_mv->as_int = row ? mic->bmi[i - 4].as_mv[0].as_int : - above_block_mv(mic, i, mis); + this_mv->as_int = row ? mic->bmi[i - 2].as_mv[0].as_int : + above_block_mv(mic, i, mis); if (mbmi->second_ref_frame > 0) - this_second_mv->as_int = row ? mic->bmi[i - 4].as_mv[1].as_int : - above_block_second_mv(mic, i, mis); + this_second_mv->as_int = row ? mic->bmi[i - 2].as_mv[1].as_int : + above_block_second_mv(mic, i, mis); break; case ZERO4X4: this_mv->as_int = 0; @@ -1597,15 +1125,15 @@ static int labels2mode( break; } - if (m == ABOVE4X4) { // replace above with left if same + if (m == ABOVE4X4) { // replace above with left if same int_mv left_mv, left_second_mv; left_second_mv.as_int = 0; left_mv.as_int = col ? mic->bmi[i - 1].as_mv[0].as_int : - left_block_mv(xd, mic, i); + left_block_mv(xd, mic, i); if (mbmi->second_ref_frame > 0) left_second_mv.as_int = col ? mic->bmi[i - 1].as_mv[1].as_int : - left_block_second_mv(xd, mic, i); + left_block_second_mv(xd, mic, i); if (left_mv.as_int == this_mv->as_int && (mbmi->second_ref_frame <= 0 || @@ -1614,8 +1142,8 @@ static int labels2mode( } #if CONFIG_NEWBINTRAMODES - cost = x->inter_bmode_costs[ - m == B_CONTEXT_PRED ? m - CONTEXT_PRED_REPLACEMENTS : m]; + cost = x->inter_bmode_costs[m == B_CONTEXT_PRED ? + m - CONTEXT_PRED_REPLACEMENTS : m]; #else cost = x->inter_bmode_costs[m]; #endif @@ -1648,24 +1176,24 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, *labelyrate = 0; *distortion = 0; - for (i = 0; i < 16; i++) { + for (i = 0; i < 4; i++) { if (labels[i] == which_label) { const int src_stride = x->plane[0].src.stride; uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, - x->plane[0].src.buf, src_stride); + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src.buf, src_stride); int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, i, - x->plane[0].src_diff); + raster_block_offset_int16(xd, BLOCK_SIZE_SB8X8, 0, i, + x->plane[0].src_diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, 16, i); uint8_t* const pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, - xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].dst.buf, + xd->plane[0].dst.stride); int thisdistortion; vp9_build_inter_predictor(pre, @@ -1681,210 +1209,48 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, // weighting for splitmv modes is turned on. if (xd->mode_info_context->mbmi.second_ref_frame > 0) { uint8_t* const second_pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, i, - xd->plane[0].pre[1].buf, - xd->plane[0].pre[1].stride); - vp9_build_inter_predictor( - second_pre, xd->plane[0].pre[1].stride, - dst, xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[i].as_mv[1], - &xd->scale_factor[1], 4, 4, 1, - &xd->subpix); + raster_block_offset_uint8(xd, BLOCK_SIZE_SB8X8, 0, i, + xd->plane[0].pre[1].buf, + xd->plane[0].pre[1].stride); + vp9_build_inter_predictor(second_pre, xd->plane[0].pre[1].stride, + dst, xd->plane[0].dst.stride, + &xd->mode_info_context->bmi[i].as_mv[1], + &xd->scale_factor[1], 4, 4, 1, + &xd->subpix); } - vp9_subtract_block(4, 4, src_diff, 16, + vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, xd->plane[0].dst.stride); - x->fwd_txm4x4(src_diff, coeff, 32); + x->fwd_txm4x4(src_diff, coeff, 16); x->quantize_b_4x4(x, i, DCT_DCT, 16); thisdistortion = vp9_block_error(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); + BLOCK_OFFSET(xd->plane[0].dqcoeff, + i, 16), 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, - ta + (i & 3), - tl + (i >> 2), TX_4X4, 16); + *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC, + ta + (i & 1), + tl + (i >> 1), TX_4X4, 16); } } *distortion >>= 2; return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } -static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, - MACROBLOCK *x, - int const *labels, - int which_label, - int *labelyrate, - int *distortion, - int64_t *otherrd, - ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl) { - int i, j; - MACROBLOCKD *xd = &x->e_mbd; - const int iblock[4] = { 0, 1, 4, 5 }; - int othercost = 0, otherdist = 0; - ENTROPY_CONTEXT tac[4], tlc[4]; - - if (otherrd) { - memcpy(&tac, ta, sizeof(tac)); - memcpy(&tlc, tl, sizeof(tlc)); - } - - *distortion = 0; - *labelyrate = 0; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - - if (labels[ib] == which_label) { - const int use_second_ref = - xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - const int idx = (ib & 8) + ((ib & 2) << 1); - const int src_stride = x->plane[0].src.stride; - uint8_t* const src = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src.buf, src_stride); - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, 0, ib, - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, idx, 16); - int thisdistortion; - uint8_t* const dst = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride); - - assert(idx < 16); - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - uint8_t* const pre = - raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib, - xd->plane[0].pre[which_mv].buf, - xd->plane[0].pre[which_mv].stride); - - // TODO(debargha): Make this work properly with the - // implicit-compoundinter-weight experiment when implicit - // weighting for splitmv modes is turned on. - vp9_build_inter_predictor( - pre, xd->plane[0].pre[which_mv].stride, - dst, xd->plane[0].dst.stride, - &xd->mode_info_context->bmi[ib].as_mv[which_mv], - &xd->scale_factor[which_mv], 8, 8, - which_mv, &xd->subpix); - } - - vp9_subtract_block(8, 8, src_diff, 16, - src, src_stride, - dst, xd->plane[0].dst.stride); - - if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) { - if (otherrd) { - x->fwd_txm8x8(src_diff, coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT, 16); - thisdistortion = vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); - otherdist += thisdistortion; - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - tac + (i & 1) * 2, - tlc + (i & 2), - TX_8X8, 16); - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - } - for (j = 0; j < 4; j += 2) { - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, - 0, ib + iblock[j], - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, - ib + iblock[j], 16); - x->fwd_txm8x4(src_diff, coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16); - thisdistortion = vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); - *distortion += thisdistortion; - *labelyrate += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - ta + (i & 1) * 2, - tl + (i & 2) + ((j & 2) >> 1), - TX_4X4, 16); - *labelyrate += - cost_coeffs(cm, x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - ta + (i & 1) * 2 + 1, - tl + (i & 2) + ((j & 2) >> 1), - TX_4X4, 16); - } - } else /* 8x8 */ { - if (otherrd) { - for (j = 0; j < 4; j += 2) { - int16_t* const src_diff = - raster_block_offset_int16(xd, BLOCK_SIZE_MB16X16, - 0, ib + iblock[j], - x->plane[0].src_diff); - int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, - ib + iblock[j], 16); - x->fwd_txm8x4(src_diff, coeff, 32); - x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1, 16); - thisdistortion = vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); - otherdist += thisdistortion; - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - othercost += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, - tac + (i & 1) * 2, - tlc + (i & 2) + ((j & 2) >> 1), - TX_4X4, 16); - othercost += - cost_coeffs(cm, x, ib + iblock[j] + 1, - PLANE_TYPE_Y_WITH_DC, - tac + (i & 1) * 2 + 1, - tlc + (i & 2) + ((j & 2) >> 1), - TX_4X4, 16); - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - } - } - x->fwd_txm8x8(src_diff, coeff, 32); - x->quantize_b_8x8(x, idx, DCT_DCT, 16); - thisdistortion = vp9_block_error_c(coeff, - BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); - *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, - ta + (i & 1) * 2, - tl + (i & 2), - TX_8X8, 16); - } - } - } - *distortion >>= 2; - if (otherrd) { - otherdist >>= 2; - *otherrd = RDCOST(x->rdmult, x->rddiv, othercost, otherdist); - } - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} - -static const unsigned int segmentation_to_sseshift[4] = {3, 3, 2, 0}; - - typedef struct { int_mv *ref_mv, *second_ref_mv; int_mv mvp; int64_t segment_rd; - SPLITMV_PARTITIONING_TYPE segment_num; - TX_SIZE txfm_size; int r; int d; int segment_yrate; - B_PREDICTION_MODE modes[16]; - int_mv mvs[16], second_mvs[16]; - int eobs[16]; + B_PREDICTION_MODE modes[4]; + int_mv mvs[4], second_mvs[4]; + int eobs[4]; int mvthresh; int *mdcounts; - - int_mv sv_mvp[4]; // save 4 mvp from 8x8 - int sv_istep[2]; // save 2 initial step_param for 16x8/8x16 - } BEST_SEG_INFO; static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { @@ -1898,37 +1264,29 @@ static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) { static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, - SPLITMV_PARTITIONING_TYPE segmentation, - TX_SIZE tx_size, int64_t *otherrds, - int64_t *rds, int *completed, - /* 16 = n_blocks */ - int_mv seg_mvs[16 /* n_blocks */] - [MAX_REF_FRAMES - 1]) { + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { int i, j; - int const *labels; + static const int labels[4] = { 0, 1, 2, 3 }; int br = 0, bd = 0; B_PREDICTION_MODE this_mode; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - - int label_count; + const int label_count = 4; int64_t this_segment_rd = 0, other_segment_rd; int label_mv_thresh; int rate = 0; int sbr = 0, sbd = 0; int segmentyrate = 0; - int best_eobs[16] = { 0 }; + int best_eobs[4] = { 0 }; vp9_variance_fn_ptr_t *v_fn_ptr; - ENTROPY_CONTEXT t_above[4], t_left[4]; - ENTROPY_CONTEXT t_above_b[4], t_left_b[4]; + ENTROPY_CONTEXT t_above[2], t_left[2]; + ENTROPY_CONTEXT t_above_b[2], t_left_b[2]; vpx_memcpy(t_above, x->e_mbd.plane[0].above_context, sizeof(t_above)); vpx_memcpy(t_left, x->e_mbd.plane[0].left_context, sizeof(t_left)); - v_fn_ptr = &cpi->fn_ptr[segmentation]; - labels = vp9_mbsplits[segmentation]; - label_count = vp9_mbsplit_count[segmentation]; + v_fn_ptr = &cpi->fn_ptr[BLOCK_4X4]; // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -1937,15 +1295,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads - rate = cost_token(vp9_mbsplit_tree, vp9_mbsplit_probs, - vp9_mbsplit_encodings + segmentation); rate += vp9_cost_mv_ref(cpi, SPLITMV, mbmi->mb_mode_context[mbmi->ref_frame]); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; other_segment_rd = this_segment_rd; - mbmi->txfm_size = tx_size; for (i = 0; i < label_count && this_segment_rd < bsi->segment_rd; i++) { int_mv mode_mv[B_MODE_COUNT], second_mode_mv[B_MODE_COUNT]; int64_t best_label_rd = INT64_MAX, best_other_rd = INT64_MAX; @@ -1954,10 +1309,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // search for the best motion vector on this segment for (this_mode = LEFT4X4; this_mode <= NEW4X4; this_mode ++) { - int64_t this_rd, other_rd; + int64_t this_rd; int distortion; int labelyrate; - ENTROPY_CONTEXT t_above_s[4], t_left_s[4]; + ENTROPY_CONTEXT t_above_s[2], t_left_s[2]; vpx_memcpy(t_above_s, t_above, sizeof(t_above_s)); vpx_memcpy(t_left_s, t_left, sizeof(t_left_s)); @@ -1977,22 +1332,13 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, break; if (cpi->compressor_speed) { - if (segmentation == PARTITIONING_8X16 || - segmentation == PARTITIONING_16X8) { - bsi->mvp.as_int = bsi->sv_mvp[i].as_int; - if (i == 1 && segmentation == PARTITIONING_16X8) - bsi->mvp.as_int = bsi->sv_mvp[2].as_int; - - step_param = bsi->sv_istep[i]; - } - // use previous block's result as next block's MV predictor. - if (segmentation == PARTITIONING_4X4 && i > 0) { + if (i > 0) { bsi->mvp.as_int = - x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; - if (i == 4 || i == 8 || i == 12) + x->e_mbd.mode_info_context->bmi[i - 1].as_mv[0].as_int; + if (i == 2) bsi->mvp.as_int = - x->e_mbd.mode_info_context->bmi[i - 4].as_mv[0].as_int; + x->e_mbd.mode_info_context->bmi[i - 2].as_mv[0].as_int; step_param = 2; } } @@ -2007,24 +1353,24 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; // find first label - n = vp9_mbsplit_offset[segmentation][i]; + n = i; // adjust src pointer for this segment x->plane[0].src.buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n, - x->plane[0].src.buf, - x->plane[0].src.stride); - assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0xf) == 0); + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->plane[0].src.buf, + x->plane[0].src.stride); + assert(((intptr_t)x->e_mbd.plane[0].pre[0].buf & 0x7) == 0); x->e_mbd.plane[0].pre[0].buf = - raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_MB16X16, 0, n, - x->e_mbd.plane[0].pre[0].buf, - x->e_mbd.plane[0].pre[0].stride); + raster_block_offset_uint8(&x->e_mbd, BLOCK_SIZE_SB8X8, 0, n, + x->e_mbd.plane[0].pre[0].buf, + x->e_mbd.plane[0].pre[0].stride); bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, bsi->ref_mv, &mode_mv[NEW4X4]); - sseshift = segmentation_to_sseshift[segmentation]; + sseshift = 0; // Should we do a full search (best quality only) if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { @@ -2041,12 +1387,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (thissme < bestsme) { bestsme = thissme; mode_mv[NEW4X4].as_int = - x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; + x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int; } else { /* The full search result is actually worse so re-instate the * previous best vector */ x->e_mbd.mode_info_context->bmi[n].as_mv[0].as_int = - mode_mv[NEW4X4].as_int; + mode_mv[NEW4X4].as_int; } } } @@ -2092,17 +1438,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, mv_check_bounds(x, &second_mode_mv[this_mode])) continue; - if (segmentation == PARTITIONING_4X4) { - this_rd = encode_inter_mb_segment(&cpi->common, - x, labels, i, &labelyrate, - &distortion, t_above_s, t_left_s); - other_rd = this_rd; - } else { - this_rd = encode_inter_mb_segment_8x8(&cpi->common, - x, labels, i, &labelyrate, - &distortion, &other_rd, - t_above_s, t_left_s); - } + this_rd = encode_inter_mb_segment(&cpi->common, + x, labels, i, &labelyrate, + &distortion, t_above_s, t_left_s); this_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); rate += labelyrate; @@ -2112,24 +1450,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; - if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_4X4) { - for (j = 0; j < 16; j++) - if (labels[j] == i) - best_eobs[j] = x->e_mbd.plane[0].eobs[j]; - } else { - for (j = 0; j < 4; j++) { - int ib = vp9_i8x8_block[j], idx = j * 4; - - if (labels[ib] == i) - best_eobs[idx] = x->e_mbd.plane[0].eobs[idx]; - } - } - if (other_rd < best_other_rd) - best_other_rd = other_rd; + for (j = 0; j < 4; j++) + if (labels[j] == i) + best_eobs[j] = x->e_mbd.plane[0].eobs[j]; vpx_memcpy(t_above_b, t_above_s, sizeof(t_above_s)); vpx_memcpy(t_left_b, t_left_s, sizeof(t_left_s)); - } } /*for each 4x4 mode*/ @@ -2146,10 +1472,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, segmentyrate += bestlabelyrate; this_segment_rd += best_label_rd; other_segment_rd += best_other_rd; - if (rds) - rds[i] = this_segment_rd; - if (otherrds) - otherrds[i] = other_segment_rd; } /* for each label */ if (this_segment_rd < bsi->segment_rd) { @@ -2157,11 +1479,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->d = bd; bsi->segment_yrate = segmentyrate; bsi->segment_rd = this_segment_rd; - bsi->segment_num = segmentation; - bsi->txfm_size = mbmi->txfm_size; // store everything needed to come back to this!! - for (i = 0; i < 16; i++) { + for (i = 0; i < 4; i++) { bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; if (mbmi->second_ref_frame > 0) bsi->second_mvs[i].as_mv = x->partition_info->bmi[i].second_mv.as_mv; @@ -2169,118 +1489,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, bsi->eobs[i] = best_eobs[i]; } } - - if (completed) { - *completed = i; - } } static void rd_check_segment(VP9_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, - unsigned int segmentation, - /* 16 = n_blocks */ - int_mv seg_mvs[16][MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { - int i, n, c = vp9_mbsplit_count[segmentation]; - - if (segmentation == PARTITIONING_4X4) { - int64_t rd[16]; - - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, NULL, - rd, &n, seg_mvs); - if (n == c) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (rd[c - 1] < txfm_cache[i]) - txfm_cache[i] = rd[c - 1]; - } - } - } else { - int64_t diff, base_rd; - int cost4x4 = vp9_cost_bit(cpi->common.prob_tx[0], 0); - int cost8x8 = vp9_cost_bit(cpi->common.prob_tx[0], 1); - - if (cpi->common.txfm_mode == TX_MODE_SELECT) { - int64_t rd4x4[4], rd8x8[4]; - int n4x4, n8x8, nmin; - BEST_SEG_INFO bsi4x4, bsi8x8; - - /* factor in cost of cost4x4/8x8 in decision */ - vpx_memcpy(&bsi4x4, bsi, sizeof(*bsi)); - vpx_memcpy(&bsi8x8, bsi, sizeof(*bsi)); - rd_check_segment_txsize(cpi, x, &bsi4x4, segmentation, - TX_4X4, NULL, rd4x4, &n4x4, seg_mvs); - rd_check_segment_txsize(cpi, x, &bsi8x8, segmentation, - TX_8X8, NULL, rd8x8, &n8x8, seg_mvs); - if (bsi4x4.segment_num == segmentation) { - bsi4x4.segment_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - if (bsi4x4.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi4x4, sizeof(*bsi)); - } - if (bsi8x8.segment_num == segmentation) { - bsi8x8.segment_rd += RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - if (bsi8x8.segment_rd < bsi->segment_rd) - vpx_memcpy(bsi, &bsi8x8, sizeof(*bsi)); - } - n = n4x4 > n8x8 ? n4x4 : n8x8; - if (n == c) { - nmin = n4x4 < n8x8 ? n4x4 : n8x8; - diff = rd8x8[nmin - 1] - rd4x4[nmin - 1]; - if (n == n4x4) { - base_rd = rd4x4[c - 1]; - } else { - base_rd = rd8x8[c - 1] - diff; - } - } - } else { - int64_t rd[4], otherrd[4]; - - if (cpi->common.txfm_mode == ONLY_4X4) { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_4X4, otherrd, - rd, &n, seg_mvs); - if (n == c) { - base_rd = rd[c - 1]; - diff = otherrd[c - 1] - rd[c - 1]; - } - } else /* use 8x8 transform */ { - rd_check_segment_txsize(cpi, x, bsi, segmentation, TX_8X8, otherrd, - rd, &n, seg_mvs); - if (n == c) { - diff = rd[c - 1] - otherrd[c - 1]; - base_rd = otherrd[c - 1]; - } - } - } - - if (n == c) { - if (base_rd < txfm_cache[ONLY_4X4]) { - txfm_cache[ONLY_4X4] = base_rd; - } - if (base_rd + diff < txfm_cache[ALLOW_8X8]) { - txfm_cache[ALLOW_8X8] = txfm_cache[ALLOW_16X16] = - txfm_cache[ALLOW_32X32] = base_rd + diff; - } - if (diff < 0) { - base_rd += diff + RDCOST(x->rdmult, x->rddiv, cost8x8, 0); - } else { - base_rd += RDCOST(x->rdmult, x->rddiv, cost4x4, 0); - } - if (base_rd < txfm_cache[TX_MODE_SELECT]) { - txfm_cache[TX_MODE_SELECT] = base_rd; - } - } - } -} - -static INLINE void cal_step_param(int sr, int *sp) { - int step = 0; - - if (sr > MAX_FIRST_STEP) sr = MAX_FIRST_STEP; - else if (sr < 1) sr = 1; - - while (sr >>= 1) - step++; - - *sp = MAX_MVSEARCH_STEPS - 1 - step; + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { + rd_check_segment_txsize(cpi, x, bsi, seg_mvs); } static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, @@ -2292,17 +1506,12 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, int *returnyrate, int *returndistortion, int *skippable, int mvthresh, - int_mv seg_mvs[NB_PARTITIONINGS] - [16 /* n_blocks */] - [MAX_REF_FRAMES - 1], - int64_t txfm_cache[NB_TXFM_MODES]) { + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]) { int i; BEST_SEG_INFO bsi; MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; vpx_memset(&bsi, 0, sizeof(bsi)); - for (i = 0; i < NB_TXFM_MODES; i++) - txfm_cache[i] = INT64_MAX; bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; @@ -2310,121 +1519,41 @@ static int rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x, bsi.mvp.as_int = best_ref_mv->as_int; bsi.mvthresh = mvthresh; bsi.mdcounts = mdcounts; - bsi.txfm_size = TX_4X4; - for (i = 0; i < 16; i++) + for (i = 0; i < 4; i++) bsi.modes[i] = ZERO4X4; - if (cpi->compressor_speed == 0) { - /* for now, we will keep the original segmentation order - when in best quality mode */ - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } else { - int sr; - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X8, - seg_mvs[PARTITIONING_8X8], txfm_cache); - - if (bsi.segment_rd < best_rd) { - int tmp_col_min = x->mv_col_min; - int tmp_col_max = x->mv_col_max; - int tmp_row_min = x->mv_row_min; - int tmp_row_max = x->mv_row_max; - - vp9_clamp_mv_min_max(x, best_ref_mv); - - /* Get 8x8 result */ - bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int; - bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int; - bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; - bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; - - /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range - * according to the closeness of 2 MV. */ - /* block 8X16 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_8X16, - seg_mvs[PARTITIONING_8X16], txfm_cache); - - /* block 16X8 */ - sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[0]); - - sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, - (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); - cal_step_param(sr, &bsi.sv_istep[1]); - - rd_check_segment(cpi, x, &bsi, PARTITIONING_16X8, - seg_mvs[PARTITIONING_16X8], txfm_cache); - - /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ - /* Not skip 4x4 if speed=0 (good quality) */ - if (cpi->sf.no_skip_block4x4_search || - bsi.segment_num == PARTITIONING_8X8) { - /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ - bsi.mvp.as_int = bsi.sv_mvp[0].as_int; - rd_check_segment(cpi, x, &bsi, PARTITIONING_4X4, - seg_mvs[PARTITIONING_4X4], txfm_cache); - } - - /* restore UMV window */ - x->mv_col_min = tmp_col_min; - x->mv_col_max = tmp_col_max; - x->mv_row_min = tmp_row_min; - x->mv_row_max = tmp_row_max; - } - } + rd_check_segment(cpi, x, &bsi, seg_mvs); /* set it to the best */ - for (i = 0; i < 16; i++) { + for (i = 0; i < 4; i++) { x->e_mbd.mode_info_context->bmi[i].as_mv[0].as_int = bsi.mvs[i].as_int; if (mbmi->second_ref_frame > 0) x->e_mbd.mode_info_context->bmi[i].as_mv[1].as_int = - bsi.second_mvs[i].as_int; + bsi.second_mvs[i].as_int; x->e_mbd.plane[0].eobs[i] = bsi.eobs[i]; } /* save partitions */ - mbmi->txfm_size = bsi.txfm_size; - mbmi->partitioning = bsi.segment_num; - x->partition_info->count = vp9_mbsplit_count[bsi.segment_num]; + x->partition_info->count = 4; for (i = 0; i < x->partition_info->count; i++) { - int j; - - j = vp9_mbsplit_offset[bsi.segment_num][i]; - - x->partition_info->bmi[i].mode = bsi.modes[j]; - x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv; + x->partition_info->bmi[i].mode = bsi.modes[i]; + x->partition_info->bmi[i].mv.as_mv = bsi.mvs[i].as_mv; if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[j].as_mv; + x->partition_info->bmi[i].second_mv.as_mv = bsi.second_mvs[i].as_mv; } /* * used to set mbmi->mv.as_int */ - x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int; + x->partition_info->bmi[3].mv.as_int = bsi.mvs[3].as_int; if (mbmi->second_ref_frame > 0) - x->partition_info->bmi[15].second_mv.as_int = bsi.second_mvs[15].as_int; + x->partition_info->bmi[3].second_mv.as_int = bsi.second_mvs[3].as_int; *returntotrate = bsi.r; *returndistortion = bsi.d; *returnyrate = bsi.segment_yrate; - *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_MB16X16); + *skippable = vp9_sby_is_skippable(&x->e_mbd, BLOCK_SIZE_SB8X8); return (int)(bsi.segment_rd); } @@ -2474,22 +1603,10 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, x->mv_best_ref_index[ref_frame] = best_index; } -static void set_i8x8_block_modes(MACROBLOCK *x, int modes[4]) { - int i; - MACROBLOCKD *xd = &x->e_mbd; - for (i = 0; i < 4; i++) { - int ib = vp9_i8x8_block[i]; - xd->mode_info_context->bmi[ib + 0].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 1].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 4].as_mode.first = modes[i]; - xd->mode_info_context->bmi[ib + 5].as_mode.first = modes[i]; - // printf("%d,%d,%d,%d\n", - // modes[0], modes[1], modes[2], modes[3]); - } -} - extern void vp9_calc_ref_probs(int *count, vp9_prob *probs); -static void estimate_curframe_refprobs(VP9_COMP *cpi, vp9_prob mod_refprobs[3], int pred_ref) { +static void estimate_curframe_refprobs(VP9_COMP *cpi, + vp9_prob mod_refprobs[3], + int pred_ref) { int norm_cnt[MAX_REF_FRAMES]; const int *const rfct = cpi->count_mb_ref_frame_usage; int intra_count = rfct[INTRA_FRAME]; @@ -2539,7 +1656,8 @@ static INLINE unsigned weighted_cost(vp9_prob *tab0, vp9_prob *tab1, return (0x8000 + weight * cost1 + (0x10000 - weight) * cost0) >> 16; } -static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int *ref_costs) { +static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, + unsigned int *ref_costs) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &cpi->mb.e_mbd; vp9_prob *mod_refprobs; @@ -2588,10 +1706,10 @@ static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, unsigned int // Get the prediction for the current mb cost = weighted_cost(&pred_prob, &new_pred_prob, 0, pred_flag, cpi->seg0_progress); - if (cost > 1024) cost = 768; // i.e. account for 4 bits max. + if (cost > 1024) cost = 768; // i.e. account for 4 bits max. // for incorrectly predicted cases - if (! pred_flag) { + if (!pred_flag) { vp9_prob curframe_mod_refprobs[3]; if (cpi->seg0_progress) { @@ -2699,6 +1817,51 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, frame_type, block_size); } + +static enum BlockSize get_block_size(int bw, int bh) { + if (bw == 4 && bh == 4) + return BLOCK_4X4; + + if (bw == 4 && bh == 8) + return BLOCK_4X8; + + if (bw == 8 && bh == 4) + return BLOCK_8X4; + + if (bw == 8 && bh == 8) + return BLOCK_8X8; + + if (bw == 8 && bh == 16) + return BLOCK_8X16; + + if (bw == 16 && bh == 8) + return BLOCK_16X8; + + if (bw == 16 && bh == 16) + return BLOCK_16X16; + + if (bw == 32 && bh == 32) + return BLOCK_32X32; + + if (bw == 32 && bh == 16) + return BLOCK_32X16; + + if (bw == 16 && bh == 32) + return BLOCK_16X32; + + if (bw == 64 && bh == 32) + return BLOCK_64X32; + + if (bw == 32 && bh == 64) + return BLOCK_32X64; + + if (bw == 64 && bh == 64) + return BLOCK_64X64; + + assert(0); + return -1; +} + static void model_rd_from_var_lapndz(int var, int n, int qstep, int *rate, int *dist) { // This function models the rate and distortion for a Laplacian @@ -2742,6 +1905,36 @@ static void model_rd_from_var_lapndz(int var, int n, int qstep, vp9_clear_system_state(); } +static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE_TYPE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int *out_dist_sum) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + unsigned int sse, var; + int i, rate_sum = 0, dist_sum = 0; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblock_plane *const p = &x->plane[i]; + struct macroblockd_plane *const pd = &xd->plane[i]; + + const int bwl = b_width_log2(bsize) - pd->subsampling_x; + const int bhl = b_height_log2(bsize) - pd->subsampling_y; + const enum BlockSize bs = get_block_size(4 << bwl, 4 << bhl); + int rate, dist; + var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); + model_rd_from_var_lapndz(var, 16 << (bwl + bhl), + pd->dequant[1] >> 3, &rate, &dist); + + rate_sum += rate; + dist_sum += dist; + } + + *out_rate_sum = rate_sum; + *out_dist_sum = dist_sum; +} + static enum BlockSize y_to_uv_block_size(enum BlockSize bs) { switch (bs) { case BLOCK_64X64: return BLOCK_32X32; @@ -2751,6 +1944,9 @@ static enum BlockSize y_to_uv_block_size(enum BlockSize bs) { case BLOCK_32X16: return BLOCK_16X8; case BLOCK_16X32: return BLOCK_8X16; case BLOCK_16X16: return BLOCK_8X8; + case BLOCK_16X8: return BLOCK_8X4; + case BLOCK_8X16: return BLOCK_4X8; + case BLOCK_8X8: return BLOCK_4X4; default: assert(0); return -1; @@ -2766,6 +1962,9 @@ static enum BlockSize y_bsizet_to_block_size(BLOCK_SIZE_TYPE bs) { case BLOCK_SIZE_SB32X16: return BLOCK_32X16; case BLOCK_SIZE_SB16X32: return BLOCK_16X32; case BLOCK_SIZE_MB16X16: return BLOCK_16X16; + case BLOCK_SIZE_SB16X8: return BLOCK_16X8; + case BLOCK_SIZE_SB8X16: return BLOCK_8X16; + case BLOCK_SIZE_SB8X8: return BLOCK_8X8; default: assert(0); return -1; @@ -2966,76 +2165,41 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used if (1) { - int switchable_filter_index, newbest; - int tmp_rate_y_i = 0, tmp_rate_u_i = 0, tmp_rate_v_i = 0; - int tmp_dist_y_i = 0, tmp_dist_u_i = 0, tmp_dist_v_i = 0; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { + int i, newbest; + int tmp_rate_sum = 0, tmp_dist_sum = 0; + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { int rs = 0; - mbmi->interp_filter = vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + const INTERPOLATIONFILTERTYPE filter = vp9_switchable_interp[i]; + const int is_intpel_interp = intpel_mv && + vp9_is_interpolating_filter[filter]; + mbmi->interp_filter = filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { + if (cm->mcomp_filter_type == SWITCHABLE) { const int c = vp9_get_pred_context(cm, xd, PRED_SWITCHABLE_INTERP); const int m = vp9_switchable_interp_map[mbmi->interp_filter]; rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs[c][m]; } - if (interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y_i + tmp_rate_u_i + tmp_rate_v_i, - tmp_dist_y_i + tmp_dist_u_i + tmp_dist_v_i); + + if (interpolating_intpel_seen && is_intpel_interp) { + rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_sum, tmp_dist_sum); } else { - unsigned int sse, var; - int tmp_rate_y, tmp_rate_u, tmp_rate_v; - int tmp_dist_y, tmp_dist_u, tmp_dist_v; + int rate_sum = 0, dist_sum = 0; vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - var = cpi->fn_ptr[block_size].vf(x->plane[0].src.buf, - x->plane[0].src.stride, - xd->plane[0].dst.buf, - xd->plane[0].dst.stride, - &sse); - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - model_rd_from_var_lapndz(var, MI_SIZE * bw * MI_SIZE * bh, - xd->plane[0].dequant[1] >> 3, - &tmp_rate_y, &tmp_dist_y); - var = cpi->fn_ptr[uv_block_size].vf(x->plane[1].src.buf, - x->plane[1].src.stride, - xd->plane[1].dst.buf, - xd->plane[1].dst.stride, - &sse); - model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh, - xd->plane[1].dequant[1] >> 3, - &tmp_rate_u, &tmp_dist_u); - var = cpi->fn_ptr[uv_block_size].vf(x->plane[2].src.buf, - x->plane[1].src.stride, - xd->plane[2].dst.buf, - xd->plane[1].dst.stride, - &sse); - model_rd_from_var_lapndz(var, MI_UV_SIZE * bw * MI_UV_SIZE * bh, - xd->plane[2].dequant[1] >> 3, - &tmp_rate_v, &tmp_dist_v); - rd = RDCOST(x->rdmult, x->rddiv, - rs + tmp_rate_y + tmp_rate_u + tmp_rate_v, - tmp_dist_y + tmp_dist_u + tmp_dist_v); - if (!interpolating_intpel_seen && intpel_mv && - vp9_is_interpolating_filter[mbmi->interp_filter]) { - tmp_rate_y_i = tmp_rate_y; - tmp_rate_u_i = tmp_rate_u; - tmp_rate_v_i = tmp_rate_v; - tmp_dist_y_i = tmp_dist_y; - tmp_dist_u_i = tmp_dist_u; - tmp_dist_v_i = tmp_dist_v; + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); + rd = RDCOST(x->rdmult, x->rddiv, rs + rate_sum, dist_sum); + if (!interpolating_intpel_seen && is_intpel_interp) { + tmp_rate_sum = rate_sum; + tmp_dist_sum = dist_sum; } } - newbest = (switchable_filter_index == 0 || rd < best_rd); + newbest = i == 0 || rd < best_rd; + if (newbest) { best_rd = rd; *best_filter = mbmi->interp_filter; } + if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || (cm->mcomp_filter_type != SWITCHABLE && cm->mcomp_filter_type == mbmi->interp_filter)) { @@ -3050,21 +2214,19 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, sizeof(unsigned char) * MI_UV_SIZE * bw); for (i = 0; i < MI_UV_SIZE * bh; ++i) vpx_memcpy(tmp_vbuf + i * MI_UV_SIZE * bw, - xd->plane[2].dst.buf + i * xd->plane[1].dst.stride, + xd->plane[2].dst.buf + i * xd->plane[2].dst.stride, sizeof(unsigned char) * MI_UV_SIZE * bw); pred_exists = 1; } - interpolating_intpel_seen |= - intpel_mv && vp9_is_interpolating_filter[mbmi->interp_filter]; + interpolating_intpel_seen |= is_intpel_interp; } } // Set the appripriate filter - if (cm->mcomp_filter_type != SWITCHABLE) - mbmi->interp_filter = cm->mcomp_filter_type; - else - mbmi->interp_filter = *best_filter; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ? + cm->mcomp_filter_type : *best_filter; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + if (pred_exists) { // FIXME(rbultje): mb code still predicts into xd->predictor @@ -3077,7 +2239,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, tmp_ubuf + i * bw * MI_UV_SIZE, sizeof(unsigned char) * bw * MI_UV_SIZE); for (i = 0; i < bh * MI_UV_SIZE; ++i) - vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[1].dst.stride, + vpx_memcpy(xd->plane[2].dst.buf + i * xd->plane[2].dst.stride, tmp_vbuf + i * bw * MI_UV_SIZE, sizeof(unsigned char) * bw * MI_UV_SIZE); } else { @@ -3193,867 +2355,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, return this_rd; // if 0, this will be re-calculated by caller } -static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, - int mi_row, int mi_col, - int *returnrate, int *returndistortion, - int64_t *returnintra) { - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - union b_mode_info best_bmodes[16]; - MB_MODE_INFO best_mbmode; - PARTITION_INFO best_partition; - int_mv best_ref_mv, second_best_ref_mv; - MB_PREDICTION_MODE this_mode; - MB_PREDICTION_MODE best_mode = DC_PRED; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - int i, best_mode_index = 0; - int mode8x8[4]; - unsigned char segment_id = mbmi->segment_id; - - int mode_index; - int mdcounts[4]; - int rate, distortion; - int rate2, distortion2; - int64_t best_txfm_rd[NB_TXFM_MODES]; - int64_t best_txfm_diff[NB_TXFM_MODES]; - int64_t best_pred_diff[NB_PREDICTION_TYPES]; - int64_t best_pred_rd[NB_PREDICTION_TYPES]; - int64_t best_rd = INT64_MAX, best_intra_rd = INT64_MAX; -#if CONFIG_COMP_INTERINTRA_PRED - int is_best_interintra = 0; - int64_t best_intra16_rd = INT64_MAX; - int best_intra16_mode = DC_PRED; -#if SEPARATE_INTERINTRA_UV - int best_intra16_uv_mode = DC_PRED; -#endif -#endif - int64_t best_overall_rd = INT64_MAX; - INTERPOLATIONFILTERTYPE best_filter = SWITCHABLE; - INTERPOLATIONFILTERTYPE tmp_best_filter = SWITCHABLE; - int uv_intra_rate[2], uv_intra_distortion[2], uv_intra_rate_tokenonly[2]; - int uv_intra_skippable[2]; - MB_PREDICTION_MODE uv_intra_mode[2]; - int rate_y, UNINITIALIZED_IS_SAFE(rate_uv); - int distortion_uv = INT_MAX; - int64_t best_yrd = INT64_MAX; - - int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; - int frame_mdcounts[4][4]; - YV12_BUFFER_CONFIG yv12_mb[4]; - - unsigned int ref_costs[MAX_REF_FRAMES]; - int_mv seg_mvs[NB_PARTITIONINGS][16 /* n_blocks */][MAX_REF_FRAMES - 1]; - - int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, - cpi->common.y_dc_delta_q); - int64_t mode_distortions[MB_MODE_COUNT] = {-1}; - int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; - int ref_frame; - - struct scale_factors scale_factor[4]; - - vpx_memset(mode8x8, 0, sizeof(mode8x8)); - vpx_memset(&frame_mv, 0, sizeof(frame_mv)); - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); - vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0, - sizeof(PICK_MODE_CONTEXT)); - - x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error = 0; - x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error = 0; - - for (i = 0; i < MAX_REF_FRAMES; i++) - frame_mv[NEWMV][i].as_int = INVALID_MV; - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = INT64_MAX; - for (i = 0; i < NB_TXFM_MODES; i++) - best_txfm_rd[i] = INT64_MAX; - - for (i = 0; i < NB_PARTITIONINGS; i++) { - int j, k; - - for (j = 0; j < 16; j++) - for (k = 0; k < MAX_REF_FRAMES - 1; k++) - seg_mvs[i][j][k].as_int = INVALID_MV; - } - - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - setup_buffer_inter(cpi, x, cpi->lst_fb_idx, - LAST_FRAME, BLOCK_16X16, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - setup_buffer_inter(cpi, x, cpi->gld_fb_idx, - GOLDEN_FRAME, BLOCK_16X16, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - if (cpi->ref_frame_flags & VP9_ALT_FLAG) { - setup_buffer_inter(cpi, x, cpi->alt_fb_idx, - ALTREF_FRAME, BLOCK_16X16, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], - frame_mdcounts, yv12_mb, scale_factor); - } - - *returnintra = INT64_MAX; - - mbmi->ref_frame = INTRA_FRAME; - - /* Initialize zbin mode boost for uv costing */ - cpi->zbin_mode_boost = 0; - vp9_update_zbin_extra(cpi, x); - - xd->mode_info_context->mbmi.mode = DC_PRED; - - for (i = 0; i <= TX_8X8; i++) { - mbmi->txfm_size = i; - rd_pick_intra_sbuv_mode(cpi, x, &uv_intra_rate[i], - &uv_intra_rate_tokenonly[i], - &uv_intra_distortion[i], - &uv_intra_skippable[i], - BLOCK_SIZE_MB16X16); - uv_intra_mode[i] = mbmi->uv_mode; - } - - // Get estimates of reference frame costs for each reference frame - // that depend on the current prediction etc. - estimate_ref_frame_costs(cpi, segment_id, ref_costs); - - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { - int64_t this_rd = INT64_MAX; - int disable_skip = 0, skippable = 0; - int other_cost = 0; - int compmode_cost = 0; -#if CONFIG_COMP_INTERINTRA_PRED - int compmode_interintra_cost = 0; -#endif - int mode_excluded = 0; - int64_t txfm_cache[NB_TXFM_MODES] = { 0 }; - YV12_BUFFER_CONFIG *scaled_ref_frame; - - // These variables hold are rolling total cost and distortion for this mode - rate2 = 0; - distortion2 = 0; - rate_y = 0; - rate_uv = 0; - - x->skip = 0; - - this_mode = vp9_mode_order[mode_index].mode; - mbmi->mode = this_mode; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame = vp9_mode_order[mode_index].ref_frame; - mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; - - mbmi->interp_filter = cm->mcomp_filter_type; - - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - // Test best rd so far against threshold for trying this mode. - if (best_rd <= cpi->rd_threshes[mode_index]) - continue; - - // Ensure that the references used by this mode are available. - if (mbmi->ref_frame && - !(cpi->ref_frame_flags & flag_list[mbmi->ref_frame])) - continue; - - if (mbmi->second_ref_frame > 0 && - !(cpi->ref_frame_flags & flag_list[mbmi->second_ref_frame])) - continue; - - // only scale on zeromv. - if (mbmi->ref_frame > 0 && - (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || - yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && - this_mode != ZEROMV) - continue; - - if (mbmi->second_ref_frame > 0 && - (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || - yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && - this_mode != ZEROMV) - continue; - - // current coding mode under rate-distortion optimization test loop -#if CONFIG_COMP_INTERINTRA_PRED - mbmi->interintra_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); - mbmi->interintra_uv_mode = (MB_PREDICTION_MODE)(DC_PRED - 1); -#endif - - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - !vp9_check_segref(xd, segment_id, mbmi->ref_frame)) { - continue; - // If the segment skip feature is enabled.... - // then do nothing if the current mode is not allowed.. - } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP) && - (this_mode != ZEROMV)) { - continue; - // Disable this drop out case if the ref frame segment - // level feature is enabled for this segment. This is to - // prevent the possibility that the we end up unable to pick any mode. - } else if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame overlay, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative - if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - if (this_mode != ZEROMV || - mbmi->ref_frame != ALTREF_FRAME) { - continue; - } - } - } - - /* everything but intra */ - scaled_ref_frame = NULL; - if (mbmi->ref_frame) { - int ref = mbmi->ref_frame; - int fb; - - best_ref_mv = mbmi->ref_mvs[ref][0]; - vpx_memcpy(mdcounts, frame_mdcounts[ref], sizeof(mdcounts)); - - if (mbmi->ref_frame == LAST_FRAME) { - fb = cpi->lst_fb_idx; - } else if (mbmi->ref_frame == GOLDEN_FRAME) { - fb = cpi->gld_fb_idx; - } else { - fb = cpi->alt_fb_idx; - } - - if (cpi->scaled_ref_idx[fb] != cm->ref_frame_map[fb]) - scaled_ref_frame = &cm->yv12_fb[cpi->scaled_ref_idx[fb]]; - } - - if (mbmi->second_ref_frame > 0) { - int ref = mbmi->second_ref_frame; - - second_best_ref_mv = mbmi->ref_mvs[ref][0]; - } - - // TODO(jkoleszar) scaling/translation handled during creation of yv12_mb - // currently. - setup_pre_planes(xd, &yv12_mb[mbmi->ref_frame], - mbmi->second_ref_frame > 0 ? &yv12_mb[mbmi->second_ref_frame] : NULL, - 0, 0, NULL, NULL); - - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - if (cpi->zbin_mode_boost_enabled) { - if (vp9_mode_order[mode_index].ref_frame == INTRA_FRAME) - cpi->zbin_mode_boost = 0; - else { - if (vp9_mode_order[mode_index].mode == ZEROMV) { - if (vp9_mode_order[mode_index].ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (vp9_mode_order[mode_index].mode == SPLITMV) - cpi->zbin_mode_boost = 0; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } - - vp9_update_zbin_extra(cpi, x); - } - - // Intra - if (!mbmi->ref_frame) { - switch (this_mode) { - default: - case V_PRED: - case H_PRED: - case D45_PRED: - case D135_PRED: - case D117_PRED: - case D153_PRED: - case D27_PRED: - case D63_PRED: - rate2 += intra_cost_penalty; - case DC_PRED: - case TM_PRED: - mbmi->ref_frame = INTRA_FRAME; - // FIXME compound intra prediction - vp9_build_intra_predictors_sby_s(&x->e_mbd, BLOCK_SIZE_MB16X16); - // vp9_build_intra_predictors_mby(&x->e_mbd); - super_block_yrd(cpi, x, &rate_y, &distortion, &skippable, - BLOCK_SIZE_MB16X16, txfm_cache); - rate2 += rate_y; - distortion2 += distortion; - rate2 += x->mbmode_cost[xd->frame_type][mbmi->mode]; - - rate2 += uv_intra_rate[mbmi->txfm_size != TX_4X4]; - rate_uv = uv_intra_rate_tokenonly[mbmi->txfm_size != TX_4X4]; - distortion2 += uv_intra_distortion[mbmi->txfm_size != TX_4X4]; - distortion_uv = uv_intra_distortion[mbmi->txfm_size != TX_4X4]; - skippable = skippable && - uv_intra_skippable[mbmi->txfm_size != TX_4X4]; - break; - case I4X4_PRED: { - int64_t tmp_rd; - - // Note the rate value returned here includes the cost of coding - // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED]; - mbmi->txfm_size = TX_4X4; - tmp_rd = rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, - &distortion, best_yrd); - rate2 += rate; - rate2 += intra_cost_penalty; - distortion2 += distortion; - - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate[TX_4X4]; - rate_uv = uv_intra_rate_tokenonly[TX_4X4]; - distortion2 += uv_intra_distortion[TX_4X4]; - distortion_uv = uv_intra_distortion[TX_4X4]; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - case I8X8_PRED: { - int64_t tmp_rd; - - tmp_rd = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate, &rate_y, - &distortion, mode8x8, - best_yrd, txfm_cache); - rate2 += rate; - rate2 += intra_cost_penalty; - distortion2 += distortion; - - /* TODO: uv rate maybe over-estimated here since there is UV intra - mode coded in I8X8_PRED prediction */ - if (tmp_rd < best_yrd) { - rate2 += uv_intra_rate[TX_4X4]; - rate_uv = uv_intra_rate_tokenonly[TX_4X4]; - distortion2 += uv_intra_distortion[TX_4X4]; - distortion_uv = uv_intra_distortion[TX_4X4]; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - } - break; - } - } - // Split MV. The code is very different from the other inter modes so - // special case it. - else if (this_mode == SPLITMV) { - const int is_comp_pred = mbmi->second_ref_frame > 0; - int64_t this_rd_thresh; - int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; - int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; - int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; - int switchable_filter_index; - int_mv *second_ref = is_comp_pred ? &second_best_ref_mv : NULL; - union b_mode_info tmp_best_bmodes[16]; - MB_MODE_INFO tmp_best_mbmode; - PARTITION_INFO tmp_best_partition; - int pred_exists = 0; - - this_rd_thresh = - (mbmi->ref_frame == LAST_FRAME) ? - cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; - this_rd_thresh = - (mbmi->ref_frame == GOLDEN_FRAME) ? - cpi->rd_threshes[THR_NEWG] : this_rd_thresh; - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int newbest; - mbmi->interp_filter = - vp9_switchable_interp[switchable_filter_index]; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - (int)this_rd_thresh, seg_mvs, - txfm_cache); - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, - PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); - } - newbest = (tmp_rd < tmp_best_rd); - if (newbest) { - tmp_best_filter = mbmi->interp_filter; - tmp_best_rd = tmp_rd; - } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { - tmp_best_rdu = tmp_rd; - tmp_best_rate = rate; - tmp_best_ratey = rate_y; - tmp_best_distortion = distortion; - tmp_best_skippable = skippable; - vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&tmp_best_partition, x->partition_info, - sizeof(PARTITION_INFO)); - for (i = 0; i < 16; i++) { - tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; - } - pred_exists = 1; - } - } // switchable_filter_index loop - - mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? - tmp_best_filter : cm->mcomp_filter_type); - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - if (!pred_exists) { - // Handles the special case when a filter that is not in the - // switchable list (bilinear, 6-tap) is indicated at the frame level - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, - second_ref, best_yrd, mdcounts, - &rate, &rate_y, &distortion, - &skippable, - (int)this_rd_thresh, seg_mvs, - txfm_cache); - } else { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, - PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); - } - tmp_rd = tmp_best_rdu; - rate = tmp_best_rate; - rate_y = tmp_best_ratey; - distortion = tmp_best_distortion; - skippable = tmp_best_skippable; - vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO)); - vpx_memcpy(x->partition_info, &tmp_best_partition, - sizeof(PARTITION_INFO)); - for (i = 0; i < 16; i++) { - xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; - } - } - - rate2 += rate; - distortion2 += distortion; - - if (cpi->common.mcomp_filter_type == SWITCHABLE) - rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs - [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] - [vp9_switchable_interp_map[mbmi->interp_filter]]; - - // If even the 'Y' rd value of split is higher than best so far - // then dont bother looking at UV - if (tmp_rd < best_yrd) { - int uv_skippable; - - vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, - BLOCK_SIZE_MB16X16); - - vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16); - - super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, - &uv_skippable, BLOCK_SIZE_MB16X16); - rate2 += rate_uv; - distortion2 += distortion_uv; - skippable = skippable && uv_skippable; - } else { - this_rd = INT64_MAX; - disable_skip = 1; - } - - if (!mode_excluded) { - if (is_comp_pred) - mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; - else - mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; - } - - compmode_cost = - vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); - mbmi->mode = this_mode; - } - else { -#if CONFIG_COMP_INTERINTRA_PRED - if (mbmi->second_ref_frame == INTRA_FRAME) { - if (best_intra16_mode == DC_PRED - 1) continue; - mbmi->interintra_mode = best_intra16_mode; -#if SEPARATE_INTERINTRA_UV - mbmi->interintra_uv_mode = best_intra16_uv_mode; -#else - mbmi->interintra_uv_mode = best_intra16_mode; -#endif - } -#endif - this_rd = handle_inter_mode(cpi, x, BLOCK_SIZE_MB16X16, - mdcounts, txfm_cache, - &rate2, &distortion2, &skippable, - &compmode_cost, -#if CONFIG_COMP_INTERINTRA_PRED - &compmode_interintra_cost, -#endif - &rate_y, &distortion, - &rate_uv, &distortion_uv, - &mode_excluded, &disable_skip, - mode_index, &tmp_best_filter, frame_mv, - scaled_ref_frame, mi_row, mi_col); - if (this_rd == INT64_MAX) - continue; - } - -#if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra) - rate2 += compmode_interintra_cost; -#endif - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) - rate2 += compmode_cost; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rate2 += ref_costs[mbmi->ref_frame]; - - if (!disable_skip) { - // Test for the condition where skip block will be activated - // because there are no non zero coefficients and make any - // necessary adjustment for rate. Ignore if skip is coded at - // segment level as the cost wont have been added in. - int mb_skip_allowed; - - // Is Mb level skip allowed (i.e. not coded at segment level). - mb_skip_allowed = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - - if (skippable) { - mbmi->mb_skip_coeff = 1; - - // Back out the coefficient coding costs - rate2 -= (rate_y + rate_uv); - // for best_yrd calculation - rate_uv = 0; - - if (mb_skip_allowed) { - int prob_skip_cost; - - // Cost the skip mb case - vp9_prob skip_prob = - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP); - - if (skip_prob) { - prob_skip_cost = vp9_cost_bit(skip_prob, 1); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - } else { - // Add in the cost of the no skip flag. - mbmi->mb_skip_coeff = 0; - if (mb_skip_allowed) { - int prob_skip_cost = vp9_cost_bit( - vp9_get_pred_prob(cm, &x->e_mbd, PRED_MBSKIP), 0); - rate2 += prob_skip_cost; - other_cost += prob_skip_cost; - } - } - - // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - } - - // Keep record of best intra distortion - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_rd < best_intra_rd)) { - best_intra_rd = this_rd; - *returnintra = distortion2; - } -#if CONFIG_COMP_INTERINTRA_PRED - if ((mbmi->ref_frame == INTRA_FRAME) && - (this_mode <= TM_PRED) && - (this_rd < best_intra16_rd)) { - best_intra16_rd = this_rd; - best_intra16_mode = this_mode; -#if SEPARATE_INTERINTRA_UV - best_intra16_uv_mode = uv_intra_mode[mbmi->txfm_size != TX_4X4]; -#endif - } -#endif - - if (!disable_skip && mbmi->ref_frame == INTRA_FRAME) - for (i = 0; i < NB_PREDICTION_TYPES; ++i) - best_pred_rd[i] = MIN(best_pred_rd[i], this_rd); - - if (this_rd < best_overall_rd) { - best_overall_rd = this_rd; - best_filter = tmp_best_filter; - best_mode = this_mode; -#if CONFIG_COMP_INTERINTRA_PRED - is_best_interintra = (mbmi->second_ref_frame == INTRA_FRAME); -#endif - } - - // Store the respective mode distortions for later use. - // Store the respective mode distortions for later use. - if (mode_distortions[this_mode] == -1 - || distortion2 < mode_distortions[this_mode]) { - mode_distortions[this_mode] = distortion2; - } - if (frame_distortions[mbmi->ref_frame] == -1 || - distortion2 < frame_distortions[mbmi->ref_frame]) { - frame_distortions[mbmi->ref_frame] = distortion2; - } - - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - /* - if (mbmi->second_ref_frame == INTRA_FRAME) { - printf("rd %d best %d bestintra16 %d\n", this_rd, best_rd, best_intra16_rd); - } - */ - // Note index of best mode so far - best_mode_index = mode_index; - - if (this_mode <= I4X4_PRED) { - if (mbmi->txfm_size != TX_4X4 - && this_mode != I4X4_PRED - && this_mode != I8X8_PRED) - mbmi->uv_mode = uv_intra_mode[TX_8X8]; - else - mbmi->uv_mode = uv_intra_mode[TX_4X4]; - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - } - - other_cost += ref_costs[mbmi->ref_frame]; - - /* Calculate the final y RD estimate for this mode */ - best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2 - rate_uv - other_cost), - (distortion2 - distortion_uv)); - - *returnrate = rate2; - *returndistortion = distortion2; - best_rd = this_rd; - vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); - vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); - - if ((this_mode == I4X4_PRED) - || (this_mode == I8X8_PRED) - || (this_mode == SPLITMV)) - for (i = 0; i < 16; i++) { - best_bmodes[i] = xd->mode_info_context->bmi[i]; - } - } - - // Testing this mode gave rise to an improvement in best error score. - // Lower threshold a bit for next time - cpi->rd_thresh_mult[mode_index] = - (cpi->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? - cpi->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; - cpi->rd_threshes[mode_index] = - (cpi->rd_baseline_thresh[mode_index] >> 7) * - cpi->rd_thresh_mult[mode_index]; - } else { - // If the mode did not help improve the best error case then raise the - // threshold for testing that mode next time around. - cpi->rd_thresh_mult[mode_index] += 4; - - if (cpi->rd_thresh_mult[mode_index] > MAX_THRESHMULT) - cpi->rd_thresh_mult[mode_index] = MAX_THRESHMULT; - - cpi->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) - * cpi->rd_thresh_mult[mode_index]; - } - - /* keep record of best compound/single-only prediction */ - if (!disable_skip && mbmi->ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd; - int single_rate, hybrid_rate; - - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; - } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; - } - - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (mbmi->second_ref_frame <= INTRA_FRAME && - single_rd < best_pred_rd[SINGLE_PREDICTION_ONLY]) { - best_pred_rd[SINGLE_PREDICTION_ONLY] = single_rd; - } else if (mbmi->second_ref_frame > INTRA_FRAME && - single_rd < best_pred_rd[COMP_PREDICTION_ONLY]) { - best_pred_rd[COMP_PREDICTION_ONLY] = single_rd; - } - if (hybrid_rd < best_pred_rd[HYBRID_PREDICTION]) - best_pred_rd[HYBRID_PREDICTION] = hybrid_rd; - } - - /* keep record of best txfm size */ - if (!mode_excluded && this_rd != INT64_MAX) { - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t adj_rd; - if (this_mode != I4X4_PRED) { - const int64_t txfm_mode_diff = - txfm_cache[i] - txfm_cache[cm->txfm_mode]; - adj_rd = this_rd + txfm_mode_diff; - } else { - adj_rd = this_rd; - } - if (adj_rd < best_txfm_rd[i]) - best_txfm_rd[i] = adj_rd; - } - } - - if (x->skip && !mode_excluded) - break; - } - - assert((cm->mcomp_filter_type == SWITCHABLE) || - (cm->mcomp_filter_type == best_mbmode.interp_filter) || - (best_mbmode.mode <= I4X4_PRED)); - -#if CONFIG_COMP_INTERINTRA_PRED - ++cpi->interintra_select_count[is_best_interintra]; -#endif - - // Accumulate filter usage stats - // TODO(agrange): Use RD criteria to select interpolation filter mode. - if (is_inter_mode(best_mode)) - ++cpi->best_switchable_interp_count[vp9_switchable_interp_map[best_filter]]; - - // Reduce the activation RD thresholds for the best choice mode - if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && - (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { - int best_adjustment = (cpi->rd_thresh_mult[best_mode_index] >> 2); - - cpi->rd_thresh_mult[best_mode_index] = - (cpi->rd_thresh_mult[best_mode_index] >= - (MIN_THRESHMULT + best_adjustment)) ? - cpi->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; - cpi->rd_threshes[best_mode_index] = - (cpi->rd_baseline_thresh[best_mode_index] >> 7) * - cpi->rd_thresh_mult[best_mode_index]; - } - - // This code forces Altref,0,0 and skip for the frame that overlays a - // an alrtef unless Altref is filtered. However, this is unsafe if - // segment level coding of ref frame is enabled for this - // segment. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME) && - cpi->is_src_frame_alt_ref && - (cpi->oxcf.arnr_max_frames == 0) && - (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { - mbmi->mode = ZEROMV; - if (cm->txfm_mode <= ALLOW_8X8) - mbmi->txfm_size = cm->txfm_mode; - else - mbmi->txfm_size = TX_16X16; - mbmi->ref_frame = ALTREF_FRAME; - mbmi->mv[0].as_int = 0; - mbmi->uv_mode = DC_PRED; - mbmi->mb_skip_coeff = 1; - mbmi->partitioning = 0; - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - - vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); - goto end; - } - - // macroblock modes - vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); - if (best_mbmode.mode == I4X4_PRED) { - for (i = 0; i < 16; i++) { - xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; - } - } - - if (best_mbmode.mode == I8X8_PRED) - set_i8x8_block_modes(x, mode8x8); - - if (best_mbmode.mode == SPLITMV) { - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv[0].as_int = - best_bmodes[i].as_mv[0].as_int; - if (mbmi->second_ref_frame > 0) - for (i = 0; i < 16; i++) - xd->mode_info_context->bmi[i].as_mv[1].as_int = - best_bmodes[i].as_mv[1].as_int; - - vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); - - mbmi->mv[0].as_int = x->partition_info->bmi[15].mv.as_int; - mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; - } - - for (i = 0; i < NB_PREDICTION_TYPES; ++i) { - if (best_pred_rd[i] == INT64_MAX) - best_pred_diff[i] = INT_MIN; - else - best_pred_diff[i] = best_rd - best_pred_rd[i]; - } - - if (!x->skip) { - for (i = 0; i < NB_TXFM_MODES; i++) { - if (best_txfm_rd[i] == INT64_MAX) - best_txfm_diff[i] = 0; - else - best_txfm_diff[i] = best_rd - best_txfm_rd[i]; - } - } else { - vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); - } - -end: - - // Flag all modes that have a distortion thats > 2x the best we found at - // this level. - for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) { - if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV - || mode_index == SPLITMV) - continue; - - if (mode_distortions[mode_index] > 2 * *returndistortion) { - x->mb_context[xd->sb_index][xd->mb_index].modes_with_high_error |= (1 - << mode_index); - } - } - - // Flag all ref frames that have a distortion thats > 2x the best we found at - // this level. - for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - if (frame_distortions[ref_frame] > 2 * *returndistortion) { - x->mb_context[xd->sb_index][xd->mb_index].frames_with_high_error |= (1 - << ref_frame); - } - } - - set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, - scale_factor); - store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], - best_mode_index, &best_partition, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : - mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); -} - void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *returnrate, int *returndist, BLOCK_SIZE_TYPE bsize, @@ -4065,14 +2366,24 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int dist_y = 0, dist_uv; int y_skip = 0, uv_skip; int64_t txfm_cache[NB_TXFM_MODES], err; + MB_PREDICTION_MODE mode; + TX_SIZE txfm_size; + int rate4x4_y, rate4x4_y_tokenonly, dist4x4_y; + int64_t err4x4 = INT64_MAX; int i; ctx->skip = 0; xd->mode_info_context->mbmi.mode = DC_PRED; err = rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, txfm_cache); + mode = xd->mode_info_context->mbmi.mode; + txfm_size = xd->mode_info_context->mbmi.txfm_size; rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize); + if (bsize == BLOCK_SIZE_SB8X8) + err4x4 = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4_y, + &rate4x4_y_tokenonly, + &dist4x4_y, err); if (y_skip && uv_skip) { *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + @@ -4080,145 +2391,30 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); memset(ctx->txfm_rd_diff, 0, sizeof(x->sb32_context[xd->sb_index].txfm_rd_diff)); + xd->mode_info_context->mbmi.mode = mode; + xd->mode_info_context->mbmi.txfm_size = txfm_size; + } else if (bsize == BLOCK_SIZE_SB8X8 && err4x4 < err) { + *returnrate = rate4x4_y + rate_uv + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returndist = dist4x4_y + (dist_uv >> 2); + for (i = 0; i < NB_TXFM_MODES; i++) { + ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); + } + xd->mode_info_context->mbmi.txfm_size = TX_4X4; } else { *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); *returndist = dist_y + (dist_uv >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { - ctx->txfm_rd_diff[i] = err - txfm_cache[i]; + ctx->txfm_rd_diff[i] = MIN(err4x4, err - txfm_cache[i]); } + xd->mode_info_context->mbmi.txfm_size = txfm_size; + xd->mode_info_context->mbmi.mode = mode; } vpx_memcpy(&ctx->mic, xd->mode_info_context, sizeof(MODE_INFO)); } -void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, int *returndist) { - VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int64_t error4x4, error16x16; - int rate4x4, rate16x16 = 0, rateuv[2]; - int dist4x4 = 0, dist16x16 = 0, distuv[2]; - int rate; - int rate4x4_tokenonly = 0; - int rate16x16_tokenonly = 0; - int rateuv_tokenonly[2]; - int64_t error8x8; - int rate8x8_tokenonly=0; - int rate8x8, dist8x8; - int mode16x16; - int mode8x8[4]; - int dist; - int modeuv[2], uv_intra_skippable[2]; - int y_intra16x16_skippable = 0; - int64_t txfm_cache[2][NB_TXFM_MODES]; - TX_SIZE txfm_size_16x16, txfm_size_8x8; - int i; - - x->mb_context[xd->sb_index][xd->mb_index].skip = 0; - mbmi->ref_frame = INTRA_FRAME; - mbmi->mode = DC_PRED; - for (i = 0; i <= TX_8X8; i++) { - mbmi->txfm_size = i; - rd_pick_intra_sbuv_mode(cpi, x, &rateuv[i], &rateuv_tokenonly[i], - &distuv[i], &uv_intra_skippable[i], - BLOCK_SIZE_MB16X16); - modeuv[i] = mbmi->uv_mode; - } - - // current macroblock under rate-distortion optimization test loop - error16x16 = rd_pick_intra_sby_mode(cpi, x, &rate16x16, - &rate16x16_tokenonly, &dist16x16, - &y_intra16x16_skippable, - BLOCK_SIZE_MB16X16, txfm_cache[1]); - mode16x16 = mbmi->mode; - txfm_size_16x16 = mbmi->txfm_size; - if (y_intra16x16_skippable && - ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) || - (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) { - error16x16 -= RDCOST(x->rdmult, x->rddiv, rate16x16_tokenonly, 0); - rate16x16 -= rate16x16_tokenonly; - } - for (i = 0; i < NB_TXFM_MODES; i++) { - txfm_cache[0][i] = error16x16 - txfm_cache[1][cm->txfm_mode] + - txfm_cache[1][i]; - } - - error8x8 = rd_pick_intra8x8mby_modes_and_txsz(cpi, x, &rate8x8, - &rate8x8_tokenonly, - &dist8x8, mode8x8, - error16x16, txfm_cache[1]); - txfm_size_8x8 = mbmi->txfm_size; - for (i = 0; i < NB_TXFM_MODES; i++) { - int64_t tmp_rd = error8x8 - txfm_cache[1][cm->txfm_mode] + txfm_cache[1][i]; - if (tmp_rd < txfm_cache[0][i]) - txfm_cache[0][i] = tmp_rd; - } - - mbmi->txfm_size = TX_4X4; - error4x4 = rd_pick_intra4x4mby_modes(cpi, x, - &rate4x4, &rate4x4_tokenonly, - &dist4x4, error16x16); - for (i = 0; i < NB_TXFM_MODES; i++) { - if (error4x4 < txfm_cache[0][i]) - txfm_cache[0][i] = error4x4; - } - - mbmi->mb_skip_coeff = 0; - if (y_intra16x16_skippable && - ((cm->txfm_mode == ONLY_4X4 && uv_intra_skippable[TX_4X4]) || - (cm->txfm_mode != ONLY_4X4 && uv_intra_skippable[TX_8X8]))) { - mbmi->mb_skip_coeff = 1; - mbmi->mode = mode16x16; - mbmi->uv_mode = modeuv[cm->txfm_mode != ONLY_4X4]; - rate = rate16x16 + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); - dist = dist16x16; - rate += rateuv[cm->txfm_mode != ONLY_4X4] - - rateuv_tokenonly[cm->txfm_mode != ONLY_4X4]; - dist += (distuv[cm->txfm_mode != ONLY_4X4] >> 2); - mbmi->txfm_size = txfm_size_16x16; - } else if (error8x8 > error16x16) { - if (error4x4 < error16x16) { - rate = rateuv[TX_4X4] + rate4x4; - mbmi->mode = I4X4_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv[TX_4X4] >> 2); - mbmi->uv_mode = modeuv[TX_4X4]; - } else { - mbmi->txfm_size = txfm_size_16x16; - mbmi->mode = mode16x16; - rate = rate16x16 + rateuv[mbmi->txfm_size != TX_4X4]; - dist = dist16x16 + (distuv[mbmi->txfm_size != TX_4X4] >> 2); - mbmi->uv_mode = modeuv[mbmi->txfm_size != TX_4X4]; - } - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } else { - if (error4x4 < error8x8) { - rate = rateuv[TX_4X4] + rate4x4; - mbmi->mode = I4X4_PRED; - mbmi->txfm_size = TX_4X4; - dist = dist4x4 + (distuv[TX_4X4] >> 2); - mbmi->uv_mode = modeuv[TX_4X4]; - } else { - mbmi->mode = I8X8_PRED; - mbmi->txfm_size = txfm_size_8x8; - set_i8x8_block_modes(x, mode8x8); - rate = rate8x8 + rateuv[TX_4X4]; - dist = dist8x8 + (distuv[TX_4X4] >> 2); - } - rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); - } - - for (i = 0; i < NB_TXFM_MODES; i++) { - x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] = - txfm_cache[0][cm->txfm_mode] - txfm_cache[0][i]; - } - - *returnrate = rate; - *returndist = dist; -} - int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *returnrate, @@ -4272,7 +2468,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, unsigned int mode_mask = 0; int64_t mode_distortions[MB_MODE_COUNT] = {-1}; int64_t frame_distortions[MAX_REF_FRAMES] = {-1}; + int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex, + cpi->common.y_dc_delta_q); + int_mv seg_mvs[4][MAX_REF_FRAMES - 1]; + union b_mode_info best_bmodes[4]; + PARTITION_INFO best_partition; + + for (i = 0; i < 4; i++) { + int j; + for (j = 0; j < MAX_REF_FRAMES - 1; j++) + seg_mvs[i][j].as_int = INVALID_MV; + } // Everywhere the flag is set the error is much higher than its neighbors. ctx->frames_with_high_error = 0; ctx->modes_with_high_error = 0; @@ -4330,7 +2537,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->Speed == 0 || (cpi->Speed > 0 && (ref_frame_mask & (1 << INTRA_FRAME)))) { mbmi->mode = DC_PRED; - for (i = 0; i <= ((bsize < BLOCK_SIZE_SB64X64) ? TX_16X16 : TX_32X32); + for (i = 0; i <= (bsize < BLOCK_SIZE_MB16X16 ? TX_4X4 : + (bsize < BLOCK_SIZE_SB32X32 ? TX_8X8 : + (bsize < BLOCK_SIZE_SB64X64 ? TX_16X16 : TX_32X32))); i++) { mbmi->txfm_size = i; rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_intra[i], &rate_uv_tokenonly[i], @@ -4362,6 +2571,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 0; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame; + if (!(ref_frame == INTRA_FRAME || (cpi->ref_frame_flags & flag_list[ref_frame]))) { continue; @@ -4382,6 +2592,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame = ref_frame; mbmi->second_ref_frame = vp9_mode_order[mode_index].second_ref_frame; + + // TODO(jingning, jkoleszar): scaling reference frame not supported for + // SPLITMV. + if (mbmi->ref_frame > 0 && + (yv12_mb[mbmi->ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->ref_frame].y_height != cm->mb_rows * 16) && + this_mode == SPLITMV) + continue; + + if (mbmi->second_ref_frame > 0 && + (yv12_mb[mbmi->second_ref_frame].y_width != cm->mb_cols * 16 || + yv12_mb[mbmi->second_ref_frame].y_height != cm->mb_rows * 16) && + this_mode == SPLITMV) + continue; + set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, scale_factor); comp_pred = mbmi->second_ref_frame > INTRA_FRAME; @@ -4400,9 +2625,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // if (!(cpi->ref_frame_flags & flag_list[ref_frame])) // continue; - if (this_mode == I8X8_PRED || - this_mode == I4X4_PRED || - this_mode == SPLITMV) + if (bsize != BLOCK_SIZE_SB8X8 && + (this_mode == I4X4_PRED || this_mode == SPLITMV)) continue; // if (vp9_mode_order[mode_index].second_ref_frame == INTRA_FRAME) // continue; @@ -4465,13 +2689,33 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } } - if (ref_frame == INTRA_FRAME) { + if (this_mode == I4X4_PRED) { + int rate; + + // Note the rate value returned here includes the cost of coding + // the I4X4_PRED mode : x->mbmode_cost[xd->frame_type][I4X4_PRED]; + assert(bsize == BLOCK_SIZE_SB8X8); + mbmi->txfm_size = TX_4X4; + rd_pick_intra4x4mby_modes(cpi, x, &rate, &rate_y, + &distortion_y, INT64_MAX); + rate2 += rate; + rate2 += intra_cost_penalty; + distortion2 += distortion_y; + + rate2 += rate_uv_intra[TX_4X4]; + rate_uv = rate_uv_intra[TX_4X4]; + distortion2 += dist_uv[TX_4X4]; + distortion_uv = dist_uv[TX_4X4]; + mbmi->uv_mode = mode_uv[TX_4X4]; + } else if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; vp9_build_intra_predictors_sby_s(xd, bsize); super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, bsize, txfm_cache); uv_tx = mbmi->txfm_size; + if (bsize < BLOCK_SIZE_MB16X16 && uv_tx == TX_8X8) + uv_tx = TX_4X4; if (bsize < BLOCK_SIZE_SB32X32 && uv_tx == TX_16X16) uv_tx = TX_8X8; else if (bsize < BLOCK_SIZE_SB64X64 && uv_tx == TX_32X32) @@ -4483,7 +2727,137 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->uv_mode = mode_uv[uv_tx]; rate2 = rate_y + x->mbmode_cost[cm->frame_type][mbmi->mode] + rate_uv; + if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) + rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; + } else if (this_mode == SPLITMV) { + const int is_comp_pred = mbmi->second_ref_frame > 0; + int rate, distortion; + int64_t this_rd_thresh; + int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; + int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; + int tmp_best_distortion = INT_MAX, tmp_best_skippable = 0; + int switchable_filter_index; + int_mv *second_ref = is_comp_pred ? + &mbmi->ref_mvs[mbmi->second_ref_frame][0] : NULL; + union b_mode_info tmp_best_bmodes[16]; + MB_MODE_INFO tmp_best_mbmode; + PARTITION_INFO tmp_best_partition; + int pred_exists = 0; + int uv_skippable; + + this_rd_thresh = (mbmi->ref_frame == LAST_FRAME) ? + cpi->rd_threshes[THR_NEWMV] : cpi->rd_threshes[THR_NEWA]; + this_rd_thresh = (mbmi->ref_frame == GOLDEN_FRAME) ? + cpi->rd_threshes[THR_NEWG] : this_rd_thresh; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; + + for (switchable_filter_index = 0; + switchable_filter_index < VP9_SWITCHABLE_FILTERS; + ++switchable_filter_index) { + int newbest; + mbmi->interp_filter = + vp9_switchable_interp[switchable_filter_index]; + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame][0], + second_ref, INT64_MAX, mdcounts, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs); + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, + PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + tmp_rd += RDCOST(x->rdmult, x->rddiv, rs, 0); + } + newbest = (tmp_rd < tmp_best_rd); + if (newbest) { + tmp_best_filter = mbmi->interp_filter; + tmp_best_rd = tmp_rd; + } + if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || + (mbmi->interp_filter == cm->mcomp_filter_type && + cm->mcomp_filter_type != SWITCHABLE)) { + tmp_best_rdu = tmp_rd; + tmp_best_rate = rate; + tmp_best_ratey = rate_y; + tmp_best_distortion = distortion; + tmp_best_skippable = skippable; + vpx_memcpy(&tmp_best_mbmode, mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&tmp_best_partition, x->partition_info, + sizeof(PARTITION_INFO)); + for (i = 0; i < 4; i++) { + tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; + } + pred_exists = 1; + } + } // switchable_filter_index loop + + mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? + tmp_best_filter : cm->mcomp_filter_type); + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + if (!pred_exists) { + // Handles the special case when a filter that is not in the + // switchable list (bilinear, 6-tap) is indicated at the frame level + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[mbmi->ref_frame][0], + second_ref, INT64_MAX, mdcounts, + &rate, &rate_y, &distortion, + &skippable, + (int)this_rd_thresh, seg_mvs); + } else { + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + int rs = SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, + PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0); + } + tmp_rd = tmp_best_rdu; + rate = tmp_best_rate; + rate_y = tmp_best_ratey; + distortion = tmp_best_distortion; + skippable = tmp_best_skippable; + vpx_memcpy(mbmi, &tmp_best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &tmp_best_partition, + sizeof(PARTITION_INFO)); + for (i = 0; i < 4; i++) { + xd->mode_info_context->bmi[i] = tmp_best_bmodes[i]; + } + } + + rate2 += rate; + distortion2 += distortion; + + if (cpi->common.mcomp_filter_type == SWITCHABLE) + rate2 += SWITCHABLE_INTERP_RATE_FACTOR * x->switchable_interp_costs + [vp9_get_pred_context(&cpi->common, xd, PRED_SWITCHABLE_INTERP)] + [vp9_switchable_interp_map[mbmi->interp_filter]]; + + // If even the 'Y' rd value of split is higher than best so far + // then dont bother looking at UV + vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, + bsize); + vp9_subtract_sbuv(x, bsize); + super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, + &uv_skippable, bsize, TX_4X4); + rate2 += rate_uv; + distortion2 += distortion_uv; + skippable = skippable && uv_skippable; + + if (!mode_excluded) { + if (is_comp_pred) + mode_excluded = cpi->common.comp_pred_mode == SINGLE_PREDICTION_ONLY; + else + mode_excluded = cpi->common.comp_pred_mode == COMP_PREDICTION_ONLY; + } + + compmode_cost = + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_COMP), is_comp_pred); + mbmi->mode = this_mode; } else { YV12_BUFFER_CONFIG *scaled_ref_frame = NULL; int fb; @@ -4640,6 +3014,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returndistortion = distortion2; best_rd = this_rd; vpx_memcpy(&best_mbmode, mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); + + if (this_mode == I4X4_PRED || this_mode == SPLITMV) { + for (i = 0; i < 4; i++) { + best_bmodes[i] = xd->mode_info_context->bmi[i]; + } + } } #if 0 // Testing this mode gave rise to an improvement in best error score. @@ -4693,6 +3074,14 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } /* keep record of best txfm size */ + if (bsize < BLOCK_SIZE_SB32X32) { + if (bsize < BLOCK_SIZE_MB16X16) { + if (this_mode == SPLITMV || this_mode == I4X4_PRED) + txfm_cache[ALLOW_8X8] = txfm_cache[ONLY_4X4]; + txfm_cache[ALLOW_16X16] = txfm_cache[ALLOW_8X8]; + } + txfm_cache[ALLOW_32X32] = txfm_cache[ALLOW_16X16]; + } if (!mode_excluded && this_rd != INT64_MAX) { for (i = 0; i < NB_TXFM_MODES; i++) { int64_t adj_rd; @@ -4769,13 +3158,18 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { mbmi->mode = ZEROMV; mbmi->ref_frame = ALTREF_FRAME; - mbmi->second_ref_frame = INTRA_FRAME; + mbmi->second_ref_frame = NONE; mbmi->mv[0].as_int = 0; mbmi->uv_mode = DC_PRED; mbmi->mb_skip_coeff = 1; - mbmi->partitioning = 0; - mbmi->txfm_size = cm->txfm_mode == TX_MODE_SELECT ? - TX_32X32 : cm->txfm_mode; + if (cm->txfm_mode == TX_MODE_SELECT) { + if (bsize >= BLOCK_SIZE_SB32X32) + mbmi->txfm_size = TX_32X32; + else if (bsize >= BLOCK_SIZE_MB16X16) + mbmi->txfm_size = TX_16X16; + else + mbmi->txfm_size = TX_8X8; + } vpx_memset(best_txfm_diff, 0, sizeof(best_txfm_diff)); vpx_memset(best_pred_diff, 0, sizeof(best_pred_diff)); @@ -4784,6 +3178,26 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // macroblock modes vpx_memcpy(mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + if (best_mbmode.mode == I4X4_PRED) { + for (i = 0; i < 4; i++) { + xd->mode_info_context->bmi[i].as_mode = best_bmodes[i].as_mode; + } + } + + if (best_mbmode.mode == SPLITMV) { + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i].as_mv[0].as_int = + best_bmodes[i].as_mv[0].as_int; + if (mbmi->second_ref_frame > 0) + for (i = 0; i < 4; i++) + xd->mode_info_context->bmi[i].as_mv[1].as_int = + best_bmodes[i].as_mv[1].as_int; + + vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); + + mbmi->mv[0].as_int = x->partition_info->bmi[3].mv.as_int; + mbmi->mv[1].as_int = x->partition_info->bmi[3].second_mv.as_int; + } for (i = 0; i < NB_PREDICTION_TYPES; ++i) { if (best_pred_rd[i] == INT64_MAX) @@ -4806,7 +3220,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, end: set_scale_factors(xd, mbmi->ref_frame, mbmi->second_ref_frame, scale_factor); - store_coding_context(x, ctx, best_mode_index, NULL, + store_coding_context(x, ctx, best_mode_index, + &best_partition, &mbmi->ref_mvs[mbmi->ref_frame][0], &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : mbmi->second_ref_frame][0], @@ -4814,41 +3229,3 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } - -void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int mi_row, int mi_col, - int *totalrate, int *totaldist) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; - int rate, distortion; - int64_t intra_error = 0; - unsigned char *segment_id = &mbmi->segment_id; - - if (xd->segmentation_enabled) - x->encode_breakout = cpi->segment_encode_breakout[*segment_id]; - else - x->encode_breakout = cpi->oxcf.encode_breakout; - - // if (cpi->sf.RD) - // For now this codebase is limited to a single rd encode path - { - int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled; - - rd_pick_inter_mode(cpi, x, mi_row, mi_col, &rate, - &distortion, &intra_error); - - /* restore cpi->zbin_mode_boost_enabled */ - cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled; - } - // else - // The non rd encode path has been deleted from this code base - // to simplify development - // vp9_pick_inter_mode - - // Store metrics so they can be added in to totals if this mode is picked - x->mb_context[xd->sb_index][xd->mb_index].distortion = distortion; - x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error; - - *totalrate = rate; - *totaldist = distortion; -} diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index eef2a4fe9..dcf5d00e9 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -19,17 +19,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); -void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); - void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d, BLOCK_SIZE_TYPE bsize, PICK_MODE_CONTEXT *ctx); -void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int mi_row, int mi_col, - int *r, int *d); - int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int *r, int *d, BLOCK_SIZE_TYPE bsize, diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 7f792ae2b..fe995ad72 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -16,18 +16,15 @@ #include "vp9/common/vp9_tile_common.h" void vp9_enable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); + VP9_COMP *cpi = (VP9_COMP *)ptr; - // Set the appropriate feature bit cpi->mb.e_mbd.segmentation_enabled = 1; cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } void vp9_disable_segmentation(VP9_PTR ptr) { - VP9_COMP *cpi = (VP9_COMP *)(ptr); - - // Clear the appropriate feature bit + VP9_COMP *cpi = (VP9_COMP *)ptr; cpi->mb.e_mbd.segmentation_enabled = 0; } @@ -60,61 +57,57 @@ void vp9_set_segment_data(VP9_PTR ptr, } // Based on set of segment counts calculate a probability tree -static void calc_segtree_probs(MACROBLOCKD *xd, - int *segcounts, +static void calc_segtree_probs(MACROBLOCKD *xd, int *segcounts, vp9_prob *segment_tree_probs) { // Work out probabilities of each segment - segment_tree_probs[0] = - get_binary_prob(segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3], - segcounts[4] + segcounts[5] + segcounts[6] + segcounts[7]); - segment_tree_probs[1] = - get_binary_prob(segcounts[0] + segcounts[1], segcounts[2] + segcounts[3]); - segment_tree_probs[2] = get_binary_prob(segcounts[0], segcounts[1]); - segment_tree_probs[3] = get_binary_prob(segcounts[2], segcounts[3]); - segment_tree_probs[4] = - get_binary_prob(segcounts[4] + segcounts[5], segcounts[6] + segcounts[7]); + const int c01 = segcounts[0] + segcounts[1]; + const int c23 = segcounts[2] + segcounts[3]; + const int c45 = segcounts[4] + segcounts[5]; + const int c67 = segcounts[6] + segcounts[7]; + + segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67); + segment_tree_probs[1] = get_binary_prob(c01, c23); + segment_tree_probs[2] = get_binary_prob(c45, c67); + segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]); + segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]); segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]); segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]); } // Based on set of segment counts and probabilities calculate a cost estimate -static int cost_segmap(MACROBLOCKD *xd, - int *segcounts, - vp9_prob *probs) { - int cost; - int count1, count2; +static int cost_segmap(MACROBLOCKD *xd, int *segcounts, vp9_prob *probs) { + const int c01 = segcounts[0] + segcounts[1]; + const int c23 = segcounts[2] + segcounts[3]; + const int c45 = segcounts[4] + segcounts[5]; + const int c67 = segcounts[6] + segcounts[7]; + const int c0123 = c01 + c23; + const int c4567 = c45 + c67; // Cost the top node of the tree - count1 = segcounts[0] + segcounts[1] + segcounts[2] + segcounts[3]; - count2 = segcounts[3] + segcounts[4] + segcounts[5] + segcounts[6]; - cost = count1 * vp9_cost_zero(probs[0]) + - count2 * vp9_cost_one(probs[0]); + int cost = c0123 * vp9_cost_zero(probs[0]) + + c4567 * vp9_cost_one(probs[0]); // Cost subsequent levels - if (count1 > 0) { - count1 = segcounts[0] + segcounts[1]; - count2 = segcounts[2] + segcounts[3]; - cost += count1 * vp9_cost_zero(probs[1]) + - count2 * vp9_cost_one(probs[1]); - - if (count1 > 0) - cost += segcounts[0] * vp9_cost_zero(probs[2]) + - segcounts[1] * vp9_cost_one(probs[2]); - if (count2 > 0) - cost += segcounts[2] * vp9_cost_zero(probs[3]) + - segcounts[3] * vp9_cost_one(probs[3]); + if (c0123 > 0) { + cost += c01 * vp9_cost_zero(probs[1]) + + c23 * vp9_cost_one(probs[1]); + + if (c01 > 0) + cost += segcounts[0] * vp9_cost_zero(probs[3]) + + segcounts[1] * vp9_cost_one(probs[3]); + if (c23 > 0) + cost += segcounts[2] * vp9_cost_zero(probs[4]) + + segcounts[3] * vp9_cost_one(probs[4]); } - if (count2 > 0) { - count1 = segcounts[4] + segcounts[5]; - count2 = segcounts[6] + segcounts[7]; - cost += count1 * vp9_cost_zero(probs[4]) + - count2 * vp9_cost_one(probs[4]); + if (c4567 > 0) { + cost += c45 * vp9_cost_zero(probs[2]) + + c67 * vp9_cost_one(probs[2]); - if (count1 > 0) + if (c45 > 0) cost += segcounts[4] * vp9_cost_zero(probs[5]) + segcounts[5] * vp9_cost_one(probs[5]); - if (count2 > 0) + if (c67 > 0) cost += segcounts[6] * vp9_cost_zero(probs[6]) + segcounts[7] * vp9_cost_one(probs[6]); } @@ -130,11 +123,12 @@ static void count_segs(VP9_COMP *cpi, int bw, int bh, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - const int segment_id = mi->mbmi.segment_id; + int segment_id; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + segment_id = mi->mbmi.segment_id; xd->mode_info_context = mi; set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); @@ -199,9 +193,11 @@ static void count_segs_sb(VP9_COMP *cpi, MODE_INFO *mi, assert(bwl < bsl && bhl < bsl); if (bsize == BLOCK_SIZE_SB64X64) { subsize = BLOCK_SIZE_SB32X32; - } else { - assert(bsize == BLOCK_SIZE_SB32X32); + } else if (bsize == BLOCK_SIZE_SB32X32) { subsize = BLOCK_SIZE_MB16X16; + } else { + assert(bsize == BLOCK_SIZE_MB16X16); + subsize = BLOCK_SIZE_SB8X8; } for (n = 0; n < 4; n++) { @@ -238,10 +234,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Set default state for the segment tree probabilities and the // temporal coding probabilities - vpx_memset(xd->mb_segment_tree_probs, 255, - sizeof(xd->mb_segment_tree_probs)); - vpx_memset(cm->segment_pred_probs, 255, - sizeof(cm->segment_pred_probs)); + vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); + vpx_memset(cm->segment_pred_probs, 255, sizeof(cm->segment_pred_probs)); vpx_memset(no_pred_segcounts, 0, sizeof(no_pred_segcounts)); vpx_memset(t_unpred_seg_counts, 0, sizeof(t_unpred_seg_counts)); @@ -249,16 +243,15 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - for (tile_col = 0; tile_col < cm->tile_columns; tile_col++) { vp9_get_tile_col_offsets(cm, tile_col); mi_ptr = cm->mi + cm->cur_tile_mi_col_start; for (mi_row = 0; mi_row < cm->mi_rows; - mi_row += (4 << CONFIG_SB8X8), mi_ptr += (4 << CONFIG_SB8X8) * mis) { + mi_row += 8, mi_ptr += 8 * mis) { mi = mi_ptr; for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; - mi_col += (4 << CONFIG_SB8X8), mi += (4 << CONFIG_SB8X8)) { + mi_col += 8, mi += 8) { count_segs_sb(cpi, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, mi_row, mi_col, BLOCK_SIZE_SB64X64); } @@ -279,27 +272,24 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // Add in the cost of the signalling for each prediction context for (i = 0; i < PREDICTION_PROBS; i++) { - t_nopred_prob[i] = get_binary_prob(temporal_predictor_count[i][0], - temporal_predictor_count[i][1]); + const int count0 = temporal_predictor_count[i][0]; + const int count1 = temporal_predictor_count[i][1]; + + t_nopred_prob[i] = get_binary_prob(count0, count1); // Add in the predictor signaling cost - t_pred_cost += (temporal_predictor_count[i][0] * - vp9_cost_zero(t_nopred_prob[i])) + - (temporal_predictor_count[i][1] * - vp9_cost_one(t_nopred_prob[i])); + t_pred_cost += count0 * vp9_cost_zero(t_nopred_prob[i]) + + count1 * vp9_cost_one(t_nopred_prob[i]); } } // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { cm->temporal_update = 1; - vpx_memcpy(xd->mb_segment_tree_probs, - t_pred_tree, sizeof(t_pred_tree)); - vpx_memcpy(&cm->segment_pred_probs, - t_nopred_prob, sizeof(t_nopred_prob)); + vpx_memcpy(xd->mb_segment_tree_probs, t_pred_tree, sizeof(t_pred_tree)); + vpx_memcpy(cm->segment_pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { cm->temporal_update = 0; - vpx_memcpy(xd->mb_segment_tree_probs, - no_pred_tree, sizeof(no_pred_tree)); + vpx_memcpy(xd->mb_segment_tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 1e6b9840b..6bd8b5036 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -481,7 +481,7 @@ void configure_arnr_filter(VP9_COMP *cpi, const unsigned int this_frame, // Note: this_frame->frame has been updated in the loop // so it now points at the ARF frame. half_gf_int = cpi->baseline_gf_interval >> 1; - frames_after_arf = (int)(cpi->twopass.total_stats->count - this_frame - 1); + frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame - 1); switch (cpi->oxcf.arnr_type) { case 1: // Backward filter diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 3c3367071..9a6598581 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -112,8 +112,6 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, PLANE_TYPE type = plane ? PLANE_TYPE_UV : PLANE_TYPE_Y_WITH_DC; TX_SIZE tx_size = ss_txfrm_size / 2; int dry_run = args->dry_run; - int ib = old_block_idx_4x4(xd, b_width_log2(bsize) + b_height_log2(bsize), - plane, block); MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; int pt; /* near block/prev token context index */ @@ -158,7 +156,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, default: case TX_4X4: { tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, ib) : DCT_DCT; + get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; seg_eob = 16; @@ -173,7 +171,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, } case TX_8X8: { const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; above_ec = (A[0] + A[1]) != 0; @@ -190,7 +188,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize, } case TX_16X16: { const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; above_ec = (A[0] + A[1] + A[2] + A[3]) != 0; @@ -376,7 +374,8 @@ int vp9_sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { int result = 1; struct is_skippable_args args = {xd, &result}; - foreach_transformed_block_in_plane(xd, bsize, 0, 0, is_skippable, &args); + foreach_transformed_block_in_plane(xd, bsize, 0, + is_skippable, &args); return result; } diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index c4c70df43..c2a600408 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -239,6 +239,32 @@ unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, return (var - (((unsigned int)avg * avg) >> 6)); } +unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + +unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); + *sse = var; + return (var - (((unsigned int)avg * avg) >> 5)); +} + unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, diff --git a/vp9/encoder/x86/vp9_quantize_mmx.asm b/vp9/encoder/x86/vp9_quantize_mmx.asm deleted file mode 100644 index 22e235610..000000000 --- a/vp9/encoder/x86/vp9_quantize_mmx.asm +++ /dev/null @@ -1,286 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - -;int vp9_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; short *scan_mask, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr); -global sym(vp9_fast_quantize_b_impl_mmx) PRIVATE -sym(vp9_fast_quantize_b_impl_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - movq mm0, [rsi] - - mov rax, arg(1) ;zbin_ptr - movq mm1, [rax] - - movq mm3, mm0 - psraw mm0, 15 - - pxor mm3, mm0 - psubw mm3, mm0 ; abs - - movq mm2, mm3 - pcmpgtw mm1, mm2 - - pandn mm1, mm2 - movq mm3, mm1 - - mov rdx, arg(6) ;quant_ptr - movq mm1, [rdx] - - mov rcx, arg(5) ;round_ptr - movq mm2, [rcx] - - paddw mm3, mm2 - pmulhuw mm3, mm1 - - pxor mm3, mm0 - psubw mm3, mm0 ;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - movq mm0, mm3 - - movq [rdi], mm3 - - mov rax, arg(3) ;dequant_ptr - movq mm2, [rax] - - pmullw mm3, mm2 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax], mm3 - - ; next 8 - movq mm4, [rsi+8] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+8] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+8] - movq mm6, [rcx+8] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+8], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+8] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+8], mm7 - - - ; next 8 - movq mm4, [rsi+16] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+16] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+16] - movq mm6, [rcx+16] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+16], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+16] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+16], mm7 - - - ; next 8 - movq mm4, [rsi+24] - - mov rax, arg(1) ;zbin_ptr - movq mm5, [rax+24] - - movq mm7, mm4 - psraw mm4, 15 - - pxor mm7, mm4 - psubw mm7, mm4 ; abs - - movq mm6, mm7 - pcmpgtw mm5, mm6 - - pandn mm5, mm6 - movq mm7, mm5 - - movq mm5, [rdx+24] - movq mm6, [rcx+24] - - paddw mm7, mm6 - pmulhuw mm7, mm5 - - pxor mm7, mm4 - psubw mm7, mm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movq mm1, mm7 - movq [rdi+24], mm7 - - mov rax, arg(3) ;dequant_ptr - movq mm6, [rax+24] - - pmullw mm7, mm6 - mov rax, arg(7) ;dqcoeff_ptr - - movq [rax+24], mm7 - - - - mov rdi, arg(4) ;scan_mask - mov rsi, arg(2) ;qcoeff_ptr - - pxor mm5, mm5 - pxor mm7, mm7 - - movq mm0, [rsi] - movq mm1, [rsi+8] - - movq mm2, [rdi] - movq mm3, [rdi+8]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - movq mm5, mm0 - - paddd mm5, mm1 - - movq mm0, [rsi+16] - movq mm1, [rsi+24] - - movq mm2, [rdi+16] - movq mm3, [rdi+24]; - - pcmpeqw mm0, mm7 - pcmpeqw mm1, mm7 - - pcmpeqw mm6, mm6 - pxor mm0, mm6 - - pxor mm1, mm6 - psrlw mm0, 15 - - psrlw mm1, 15 - pmaddwd mm0, mm2 - - pmaddwd mm1, mm3 - paddd mm5, mm0 - - paddd mm5, mm1 - movq mm0, mm5 - - psrlq mm5, 32 - paddd mm0, mm5 - - ; eob adjustment begins here - movq rcx, mm0 - and rcx, 0xffff - - xor rdx, rdx - sub rdx, rcx ; rdx=-rcx - - bsr rax, rcx - inc rax - - sar rdx, 31 - and rax, rdx - ; Substitute the sse assembly for the old mmx mixed assembly/C. The - ; following is kept as reference - ; movq rcx, mm0 - ; bsr rax, rcx - ; - ; mov eob, rax - ; mov eee, rcx - ; - ;if(eee==0) - ;{ - ; eob=-1; - ;} - ;else if(eee<0) - ;{ - ; eob=15; - ;} - ;d->eob = eob+1; - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_quantize_sse2.asm b/vp9/encoder/x86/vp9_quantize_sse2.asm deleted file mode 100644 index 700e64b1f..000000000 --- a/vp9/encoder/x86/vp9_quantize_sse2.asm +++ /dev/null @@ -1,379 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -; void vp9_regular_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse2) PRIVATE -sym(vp9_regular_quantize_b_sse2): - push rbp - mov rbp, rsp - SAVE_XMM 7 - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %endif -%endif - - ALIGN_STACK 16, rax - %define zrun_zbin_boost 0 ; 8 - %define abs_minus_zbin 8 ; 32 - %define temp_qcoeff 40 ; 32 - %define qcoeff 72 ; 32 - %define stack_size 104 - sub rsp, stack_size - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rdx, [rdi + vp9_block_coeff] ; coeff_ptr - mov rcx, [rdi + vp9_block_zbin] ; zbin_ptr - movd xmm7, [rdi + vp9_block_zbin_extra] ; zbin_oq_value - - ; z - movdqa xmm0, [rdx] - movdqa xmm4, [rdx + 16] - mov rdx, [rdi + vp9_block_round] ; round_ptr - - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 ; duplicated zbin_oq_value - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz - psraw xmm0, 15 - psraw xmm4, 15 - - ; (z ^ sz) - pxor xmm1, xmm0 - pxor xmm5, xmm4 - - ; x = abs(z) - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - mov rcx, [rdi + vp9_block_quant] ; quant_ptr - - ; *zbin_ptr + zbin_oq_value - paddw xmm2, xmm7 - paddw xmm3, xmm7 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm1, xmm2 - psubw xmm5, xmm3 - movdqa [rsp + abs_minus_zbin], xmm1 - movdqa [rsp + abs_minus_zbin + 16], xmm5 - - ; add (zbin_ptr + zbin_oq_value) back - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - movdqa xmm2, [rdx] - movdqa xmm6, [rdx + 16] - - movdqa xmm3, [rcx] - movdqa xmm7, [rcx + 16] - - ; x + round - paddw xmm1, xmm2 - paddw xmm5, xmm6 - - ; y = x * quant_ptr >> 16 - pmulhw xmm3, xmm1 - pmulhw xmm7, xmm5 - - ; y += x - paddw xmm1, xmm3 - paddw xmm5, xmm7 - - movdqa [rsp + temp_qcoeff], xmm1 - movdqa [rsp + temp_qcoeff + 16], xmm5 - - pxor xmm6, xmm6 - ; zero qcoeff - movdqa [rsp + qcoeff], xmm6 - movdqa [rsp + qcoeff + 16], xmm6 - - mov rdx, [rdi + vp9_block_zrun_zbin_boost] ; zbin_boost_ptr - mov rax, [rdi + vp9_block_quant_shift] ; quant_shift_ptr - mov [rsp + zrun_zbin_boost], rdx - -%macro ZIGZAG_LOOP 1 - ; x - movsx ecx, WORD PTR[rsp + abs_minus_zbin + %1 * 2] - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - movsx edi, WORD PTR[rsp + temp_qcoeff + %1 * 2] - - ; downshift by quant_shift[rc] - movsx cx, BYTE PTR[rax + %1] ; quant_shift_ptr[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y - mov WORD PTR[rsp + qcoeff + %1 * 2], di ;qcoeff_ptr[rc] = temp_qcoeff[rc] - mov rdx, [rsp + zrun_zbin_boost] ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c -ZIGZAG_LOOP 0 -ZIGZAG_LOOP 1 -ZIGZAG_LOOP 4 -ZIGZAG_LOOP 8 -ZIGZAG_LOOP 5 -ZIGZAG_LOOP 2 -ZIGZAG_LOOP 3 -ZIGZAG_LOOP 6 -ZIGZAG_LOOP 9 -ZIGZAG_LOOP 12 -ZIGZAG_LOOP 13 -ZIGZAG_LOOP 10 -ZIGZAG_LOOP 7 -ZIGZAG_LOOP 11 -ZIGZAG_LOOP 14 -ZIGZAG_LOOP 15 - - movdqa xmm2, [rsp + qcoeff] - movdqa xmm3, [rsp + qcoeff + 16] - - mov rcx, [rsi + vp9_blockd_dequant] ; dequant_ptr - mov rdi, [rsi + vp9_blockd_dqcoeff] ; dqcoeff_ptr - - ; y ^ sz - pxor xmm2, xmm0 - pxor xmm3, xmm4 - ; x = (y ^ sz) - sz - psubw xmm2, xmm0 - psubw xmm3, xmm4 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] ; qcoeff_ptr - - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - movdqa [rcx], xmm2 ; store qcoeff - movdqa [rcx + 16], xmm3 - movdqa [rdi], xmm0 ; store dqcoeff - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pcmpeqw xmm2, xmm6 - pcmpeqw xmm3, xmm6 - ; ! - pcmpeqw xmm6, xmm6 - pxor xmm2, xmm6 - pxor xmm3, xmm6 - ; mask inv_zig_zag - pand xmm2, [GLOBAL(inv_zig_zag)] - pand xmm3, [GLOBAL(inv_zig_zag + 16)] - ; select the max value - pmaxsw xmm2, xmm3 - pshufd xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00001110b - pmaxsw xmm2, xmm3 - pshuflw xmm3, xmm2, 00000001b - pmaxsw xmm2, xmm3 - movd eax, xmm2 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog - add rsp, stack_size - pop rsp -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - RESTORE_GOT - RESTORE_XMM - pop rbp - ret - -; void vp9_fast_quantize_b_sse2 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_fast_quantize_b_sse2) PRIVATE -sym(vp9_fast_quantize_b_sse2): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %else - ; these registers are used for passing arguments - %endif -%endif - - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; z = coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; dup z so we can save sz - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - ; x = abs(z) = (z ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; x += round - paddw xmm1, [rcx] - paddw xmm5, [rcx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - - ; y = x * quant >> 16 - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - ; x = (y ^ sz) - sz - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - ; qcoeff = x - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - ; x * dequant - movdqa xmm2, xmm1 - movdqa xmm3, xmm5 - pmullw xmm2, [rcx] - pmullw xmm3, [rcx + 16] - - ; dqcoeff = x * dequant - movdqa [rdi], xmm2 - movdqa [rdi + 16], xmm3 - - pxor xmm4, xmm4 ;clear all bits - pcmpeqw xmm1, xmm4 - pcmpeqw xmm5, xmm4 - - pcmpeqw xmm4, xmm4 ;set all bits - pxor xmm1, xmm4 - pxor xmm5, xmm4 - - pand xmm1, [GLOBAL(inv_zig_zag)] - pand xmm5, [GLOBAL(inv_zig_zag + 16)] - - pmaxsw xmm1, xmm5 - - ; now down to 8 - pshufd xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; only 4 left - pshuflw xmm5, xmm1, 00001110b - - pmaxsw xmm1, xmm5 - - ; okay, just 2! - pshuflw xmm5, xmm1, 00000001b - - pmaxsw xmm1, xmm5 - - movd eax, xmm1 - and eax, 0xff - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -inv_zig_zag: - dw 0x0001, 0x0002, 0x0006, 0x0007 - dw 0x0003, 0x0005, 0x0008, 0x000d - dw 0x0004, 0x0009, 0x000c, 0x000e - dw 0x000a, 0x000b, 0x000f, 0x0010 diff --git a/vp9/encoder/x86/vp9_quantize_sse4.asm b/vp9/encoder/x86/vp9_quantize_sse4.asm deleted file mode 100644 index 4c14e5ffe..000000000 --- a/vp9/encoder/x86/vp9_quantize_sse4.asm +++ /dev/null @@ -1,253 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -; void vp9_regular_quantize_b_sse4 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 - -global sym(vp9_regular_quantize_b_sse4) PRIVATE -sym(vp9_regular_quantize_b_sse4): - -%if ABI_IS_32BIT - push rbp - mov rbp, rsp - GET_GOT rbx - push rdi - push rsi - - ALIGN_STACK 16, rax - %define qcoeff 0 ; 32 - %define stack_size 32 - sub rsp, stack_size -%else - %if LIBVPX_YASM_WIN64 - SAVE_XMM 8, u - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_zbin] - mov rdx, [rdi + vp9_block_round] - movd xmm7, [rdi + vp9_block_zbin_extra] - - ; z - movdqa xmm0, [rax] - movdqa xmm1, [rax + 16] - - ; duplicate zbin_oq_value - pshuflw xmm7, xmm7, 0 - punpcklwd xmm7, xmm7 - - movdqa xmm2, xmm0 - movdqa xmm3, xmm1 - - ; sz - psraw xmm0, 15 - psraw xmm1, 15 - - ; (z ^ sz) - pxor xmm2, xmm0 - pxor xmm3, xmm1 - - ; x = abs(z) - psubw xmm2, xmm0 - psubw xmm3, xmm1 - - ; zbin - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; *zbin_ptr + zbin_oq_value - paddw xmm4, xmm7 - paddw xmm5, xmm7 - - movdqa xmm6, xmm2 - movdqa xmm7, xmm3 - - ; x - (*zbin_ptr + zbin_oq_value) - psubw xmm6, xmm4 - psubw xmm7, xmm5 - - ; round - movdqa xmm4, [rdx] - movdqa xmm5, [rdx + 16] - - mov rax, [rdi + vp9_block_quant_shift] - mov rcx, [rdi + vp9_block_quant] - mov rdx, [rdi + vp9_block_zrun_zbin_boost] - - ; x + round - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - ; quant - movdqa xmm4, [rcx] - movdqa xmm5, [rcx + 16] - - ; y = x * quant_ptr >> 16 - pmulhw xmm4, xmm2 - pmulhw xmm5, xmm3 - - ; y += x - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - pxor xmm4, xmm4 -%if ABI_IS_32BIT - movdqa [rsp + qcoeff], xmm4 - movdqa [rsp + qcoeff + 16], xmm4 -%else - pxor xmm8, xmm8 -%endif - - ; quant_shift - movdqa xmm5, [rax] - - ; zrun_zbin_boost - mov rax, rdx - -%macro ZIGZAG_LOOP 5 - ; x - pextrw ecx, %4, %2 - - ; if (x >= zbin) - sub cx, WORD PTR[rdx] ; x - zbin - lea rdx, [rdx + 2] ; zbin_boost_ptr++ - jl .rq_zigzag_loop_%1 ; x < zbin - - pextrw edi, %3, %2 ; y - - ; downshift by quant_shift[rc] - pextrb ecx, xmm5, %1 ; quant_shift[rc] - sar edi, cl ; also sets Z bit - je .rq_zigzag_loop_%1 ; !y -%if ABI_IS_32BIT - mov WORD PTR[rsp + qcoeff + %1 *2], di -%else - pinsrw %5, edi, %2 ; qcoeff[rc] -%endif - mov rdx, rax ; reset to b->zrun_zbin_boost -.rq_zigzag_loop_%1: -%endmacro -; in vp9_default_zig_zag1d order: see vp9/common/vp9_entropy.c -ZIGZAG_LOOP 0, 0, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 1, 1, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 4, 4, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 8, 0, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 5, 5, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 2, 2, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 3, 3, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 6, 6, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 9, 1, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 12, 4, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 13, 5, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 10, 2, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 7, 7, xmm2, xmm6, xmm4 -ZIGZAG_LOOP 11, 3, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 14, 6, xmm3, xmm7, xmm8 -ZIGZAG_LOOP 15, 7, xmm3, xmm7, xmm8 - - mov rcx, [rsi + vp9_blockd_dequant] - mov rdi, [rsi + vp9_blockd_dqcoeff] - -%if ABI_IS_32BIT - movdqa xmm4, [rsp + qcoeff] - movdqa xmm5, [rsp + qcoeff + 16] -%else - %define xmm5 xmm8 -%endif - - ; y ^ sz - pxor xmm4, xmm0 - pxor xmm5, xmm1 - ; x = (y ^ sz) - sz - psubw xmm4, xmm0 - psubw xmm5, xmm1 - - ; dequant - movdqa xmm0, [rcx] - movdqa xmm1, [rcx + 16] - - mov rcx, [rsi + vp9_blockd_qcoeff] - - pmullw xmm0, xmm4 - pmullw xmm1, xmm5 - - ; store qcoeff - movdqa [rcx], xmm4 - movdqa [rcx + 16], xmm5 - - ; store dqcoeff - movdqa [rdi], xmm0 - movdqa [rdi + 16], xmm1 - - ; select the last value (in zig_zag order) for EOB - pxor xmm6, xmm6 - pcmpeqw xmm4, xmm6 - pcmpeqw xmm5, xmm6 - - packsswb xmm4, xmm5 - pshufb xmm4, [GLOBAL(zig_zag1d)] - pmovmskb edx, xmm4 - xor rdi, rdi - mov eax, -1 - xor dx, ax - bsr eax, edx - sub edi, edx - sar edi, 31 - add eax, 1 - and eax, edi - - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - add rsp, stack_size - pop rsp - - pop rsi - pop rdi - RESTORE_GOT - pop rbp -%else - %undef xmm5 - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - RESTORE_XMM - %endif -%endif - - ret - -SECTION_RODATA -align 16 -; vp9/common/vp9_entropy.c: vp9_default_zig_zag1d -zig_zag1d: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm deleted file mode 100644 index 1fa052147..000000000 --- a/vp9/encoder/x86/vp9_quantize_ssse3.asm +++ /dev/null @@ -1,137 +0,0 @@ -; -; Copyright (c) 2010 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - -%include "vpx_ports/x86_abi_support.asm" - - -; void vp9_fast_quantize_b_ssse3 | arg -; (BLOCK *b, | 0 -; BLOCKD *d) | 1 -; - -global sym(vp9_fast_quantize_b_ssse3) PRIVATE -sym(vp9_fast_quantize_b_ssse3): - push rbp - mov rbp, rsp - GET_GOT rbx - -%if ABI_IS_32BIT - push rdi - push rsi -%else - %if LIBVPX_YASM_WIN64 - push rdi - push rsi - %endif -%endif - ; end prolog - -%if ABI_IS_32BIT - mov rdi, arg(0) ; BLOCK *b - mov rsi, arg(1) ; BLOCKD *d -%else - %if LIBVPX_YASM_WIN64 - mov rdi, rcx ; BLOCK *b - mov rsi, rdx ; BLOCKD *d - %else - ;mov rdi, rdi ; BLOCK *b - ;mov rsi, rsi ; BLOCKD *d - %endif -%endif - - mov rax, [rdi + vp9_block_coeff] - mov rcx, [rdi + vp9_block_round] - mov rdx, [rdi + vp9_block_quant_fast] - - ; coeff - movdqa xmm0, [rax] - movdqa xmm4, [rax + 16] - - ; round - movdqa xmm2, [rcx] - movdqa xmm3, [rcx + 16] - - movdqa xmm1, xmm0 - movdqa xmm5, xmm4 - - ; sz = z >> 15 - psraw xmm0, 15 - psraw xmm4, 15 - - pabsw xmm1, xmm1 - pabsw xmm5, xmm5 - - paddw xmm1, xmm2 - paddw xmm5, xmm3 - - ; quant_fast - pmulhw xmm1, [rdx] - pmulhw xmm5, [rdx + 16] - - mov rax, [rsi + vp9_blockd_qcoeff] - mov rdi, [rsi + vp9_blockd_dequant] - mov rcx, [rsi + vp9_blockd_dqcoeff] - - pxor xmm1, xmm0 - pxor xmm5, xmm4 - psubw xmm1, xmm0 - psubw xmm5, xmm4 - - movdqa [rax], xmm1 - movdqa [rax + 16], xmm5 - - movdqa xmm2, [rdi] - movdqa xmm3, [rdi + 16] - - pxor xmm4, xmm4 - pmullw xmm2, xmm1 - pmullw xmm3, xmm5 - - pcmpeqw xmm1, xmm4 ;non zero mask - pcmpeqw xmm5, xmm4 ;non zero mask - packsswb xmm1, xmm5 - pshufb xmm1, [GLOBAL(zz_shuf)] - - pmovmskb edx, xmm1 - - xor rdi, rdi - mov eax, -1 - xor dx, ax ;flip the bits for bsr - bsr eax, edx - - movdqa [rcx], xmm2 ;store dqcoeff - movdqa [rcx + 16], xmm3 ;store dqcoeff - - sub edi, edx ;check for all zeros in bit mask - sar edi, 31 ;0 or -1 - add eax, 1 - and eax, edi ;if the bit mask was all zero, - ;then eob = 0 - mov [rsi + vp9_blockd_eob], eax - - ; begin epilog -%if ABI_IS_32BIT - pop rsi - pop rdi -%else - %if LIBVPX_YASM_WIN64 - pop rsi - pop rdi - %endif -%endif - - RESTORE_GOT - pop rbp - ret - -SECTION_RODATA -align 16 -zz_shuf: - db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp9/encoder/x86/vp9_quantize_x86.h b/vp9/encoder/x86/vp9_quantize_x86.h deleted file mode 100644 index d1db17394..000000000 --- a/vp9/encoder/x86/vp9_quantize_x86.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - -#ifndef VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_ -#define VP9_ENCODER_X86_VP9_QUANTIZE_X86_H_ - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX - -#endif /* HAVE_MMX */ - - -#if HAVE_SSE2 -extern prototype_quantize_block(vp9_regular_quantize_b_sse2); -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse2 -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE2 */ - - -#if HAVE_SSE4_1 -extern prototype_quantize_block(vp9_regular_quantize_b_sse4); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp9_quantize_quantb -#define vp9_quantize_quantb vp9_regular_quantize_b_sse4 - -#endif /* !CONFIG_RUNTIME_CPU_DETECT */ - -#endif /* HAVE_SSE4_1 */ - -#endif /* QUANTIZE_X86_H */ |