diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_dct.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 49 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 47 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemv.c | 52 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemv.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 35 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 492 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 31 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 26 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.h | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 36 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 327 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_segmentation.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_tokenize.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance_c.c | 147 |
22 files changed, 717 insertions, 618 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 622f75fe6..20dd8e175 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -484,17 +484,13 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { } if (bsize < BLOCK_8X8) { - int j; - MB_PREDICTION_MODE blockmode; - int_mv blockmv; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - j = idy * 2 + idx; - blockmode = x->partition_info->bmi[j].mode; - blockmv = m->bmi[j].as_mv[0]; + const int j = idy * 2 + idx; + const MB_PREDICTION_MODE blockmode = x->partition_info->bmi[j].mode; write_sb_mv_ref(bc, blockmode, mv_ref_p); ++cm->counts.inter_mode[mi->mode_context[rf]] [inter_mode_offset(blockmode)]; @@ -503,14 +499,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { #ifdef ENTROPY_STATS active_section = 11; #endif - vp9_encode_mv(cpi, bc, &blockmv.as_mv, &mi->best_mv.as_mv, - nmvc, allow_hp); - - if (mi->ref_frame[1] > INTRA_FRAME) - vp9_encode_mv(cpi, bc, - &m->bmi[j].as_mv[1].as_mv, - &mi->best_second_mv.as_mv, - nmvc, allow_hp); + vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[0].as_mv, + &mi->best_mv[0].as_mv, nmvc, allow_hp); + + if (has_second_ref(mi)) + vp9_encode_mv(cpi, bc, &m->bmi[j].as_mv[1].as_mv, + &mi->best_mv[1].as_mv, nmvc, allow_hp); } } } @@ -518,12 +512,12 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc) { #ifdef ENTROPY_STATS active_section = 5; #endif - vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, &mi->best_mv.as_mv, - nmvc, allow_hp); + vp9_encode_mv(cpi, bc, &mi->mv[0].as_mv, + &mi->best_mv[0].as_mv, nmvc, allow_hp); - if (mi->ref_frame[1] > INTRA_FRAME) - vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, &mi->best_second_mv.as_mv, - nmvc, allow_hp); + if (has_second_ref(mi)) + vp9_encode_mv(cpi, bc, &mi->mv[1].as_mv, + &mi->best_mv[1].as_mv, nmvc, allow_hp); } } } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 013047e35..5a0d746c8 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -34,6 +34,7 @@ typedef struct { typedef struct { MODE_INFO mic; PARTITION_INFO partition_info; + unsigned char zcoeff_blk[256]; int skip; int_mv best_ref_mv; int_mv second_best_ref_mv; @@ -136,6 +137,7 @@ struct macroblock { int mv_row_min; int mv_row_max; + unsigned char zcoeff_blk[TX_SIZES][256]; int skip; int encode_breakout; diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index ca863931e..b9c300033 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -637,10 +637,10 @@ void vp9_short_walsh4x4_c(short *input, short *output, int pitch) { c1 = e1 - c1; a1 -= c1; d1 += b1; - op[0] = a1 << WHT_UPSCALE_FACTOR; - op[1] = c1 << WHT_UPSCALE_FACTOR; - op[2] = d1 << WHT_UPSCALE_FACTOR; - op[3] = b1 << WHT_UPSCALE_FACTOR; + op[0] = a1 * UNIT_QUANT_FACTOR; + op[1] = c1 * UNIT_QUANT_FACTOR; + op[2] = d1 * UNIT_QUANT_FACTOR; + op[3] = b1 * UNIT_QUANT_FACTOR; ip += 4; op += 4; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index ee938bda9..f6045e80b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -390,6 +390,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } x->skip = ctx->skip; + vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, + sizeof(ctx->zcoeff_blk)); + if (!output_enabled) return; @@ -428,19 +431,19 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, cpi->mode_chosen_counts[mb_mode_index]++; if (is_inter_block(mbmi) && (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) { - int_mv best_mv, best_second_mv; + int_mv best_mv[2]; const MV_REFERENCE_FRAME rf1 = mbmi->ref_frame[0]; const MV_REFERENCE_FRAME rf2 = mbmi->ref_frame[1]; - best_mv.as_int = ctx->best_ref_mv.as_int; - best_second_mv.as_int = ctx->second_best_ref_mv.as_int; + best_mv[0].as_int = ctx->best_ref_mv.as_int; + best_mv[1].as_int = ctx->second_best_ref_mv.as_int; if (mbmi->mode == NEWMV) { - best_mv.as_int = mbmi->ref_mvs[rf1][0].as_int; + best_mv[0].as_int = mbmi->ref_mvs[rf1][0].as_int; if (rf2 > 0) - best_second_mv.as_int = mbmi->ref_mvs[rf2][0].as_int; + best_mv[1].as_int = mbmi->ref_mvs[rf2][0].as_int; } - mbmi->best_mv.as_int = best_mv.as_int; - mbmi->best_second_mv.as_int = best_second_mv.as_int; - vp9_update_nmv_count(cpi, x, &best_mv, &best_second_mv); + mbmi->best_mv[0].as_int = best_mv[0].as_int; + mbmi->best_mv[1].as_int = best_mv[1].as_int; + vp9_update_mv_count(cpi, x, best_mv); } if (cm->mcomp_filter_type == SWITCHABLE && is_inter_mode(mbmi->mode)) { @@ -2209,7 +2212,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->inter_zz_count = 0; vp9_zero(cm->counts.switchable_interp); - vp9_zero(cpi->txfm_stepdown_count); + vp9_zero(cpi->tx_stepdown_count); xd->mi_8x8 = cm->mi_grid_visible; // required for vp9_frame_init_quantizer @@ -2348,18 +2351,19 @@ static void reset_skip_txfm_size_b(VP9_COMP *cpi, MODE_INFO **mi_8x8, int mis, TX_SIZE max_tx_size, int bw, int bh, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON * const cm = &cpi->common; - MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi; - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) { return; - - if (mbmi->tx_size > max_tx_size) { - const int ymbs = MIN(bh, cm->mi_rows - mi_row); - const int xmbs = MIN(bw, cm->mi_cols - mi_col); - - assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || - get_skip_flag(mi_8x8, mis, ymbs, xmbs)); - set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); + } else { + MB_MODE_INFO * const mbmi = &mi_8x8[0]->mbmi; + if (mbmi->tx_size > max_tx_size) { + const int ymbs = MIN(bh, cm->mi_rows - mi_row); + const int xmbs = MIN(bw, cm->mi_cols - mi_col); + + assert(vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || + get_skip_flag(mi_8x8, mis, ymbs, xmbs)); + set_txfm_flag(mi_8x8, mis, ymbs, xmbs, max_tx_size); + } } } @@ -2454,9 +2458,9 @@ static void select_tx_mode(VP9_COMP *cpi) { unsigned int total = 0; int i; for (i = 0; i < TX_SIZES; ++i) - total += cpi->txfm_stepdown_count[i]; + total += cpi->tx_stepdown_count[i]; if (total) { - double fraction = (double)cpi->txfm_stepdown_count[0] / total; + double fraction = (double)cpi->tx_stepdown_count[0] / total; cpi->common.tx_mode = fraction > 0.90 ? ALLOW_32X32 : TX_MODE_SELECT; // printf("fraction = %f\n", fraction); } // else keep unchanged @@ -2732,7 +2736,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[0])]; YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx]; YV12_BUFFER_CONFIG *second_ref_fb = NULL; - if (mbmi->ref_frame[1] > 0) { + if (has_second_ref(mbmi)) { idx = cm->ref_frame_map[get_ref_frame_idx(cpi, mbmi->ref_frame[1])]; second_ref_fb = &cm->yv12_fb[idx]; } @@ -2744,7 +2748,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col, &xd->scale_factor[1]); - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 2c12477a7..76a5d33e7 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -53,7 +53,7 @@ static void inverse_transform_b_8x8_add(int eob, if (eob <= 1) vp9_short_idct8x8_1_add(dqcoeff, dest, stride); else if (eob <= 10) - vp9_short_idct10_8x8_add(dqcoeff, dest, stride); + vp9_short_idct8x8_10_add(dqcoeff, dest, stride); else vp9_short_idct8x8_add(dqcoeff, dest, stride); } @@ -64,7 +64,7 @@ static void inverse_transform_b_16x16_add(int eob, if (eob <= 1) vp9_short_idct16x16_1_add(dqcoeff, dest, stride); else if (eob <= 10) - vp9_short_idct10_16x16_add(dqcoeff, dest, stride); + vp9_short_idct16x16_10_add(dqcoeff, dest, stride); else vp9_short_idct16x16_add(dqcoeff, dest, stride); } @@ -172,7 +172,7 @@ static void optimize_b(MACROBLOCK *mb, assert((!type && !plane) || (type && plane)); dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block); qcoeff_ptr = BLOCK_OFFSET(pd->qcoeff, block); - get_scan_and_band(xd, tx_size, type, ib, &scan, &band_translate); + get_scan_and_band(xd, tx_size, type, ib, &scan, &nb, &band_translate); assert(eob <= default_eob); /* Now set up a Viterbi trellis to evaluate alternative roundings. */ @@ -191,7 +191,6 @@ static void optimize_b(MACROBLOCK *mb, for (i = 0; i < eob; i++) token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[ qcoeff_ptr[scan[i]]].token]; - nb = vp9_get_coef_neighbors_handle(scan); for (i = eob; i-- > i0;) { int base_bits, d2, dx; @@ -365,36 +364,10 @@ static void optimize_init_b(int plane, BLOCK_SIZE bsize, const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size; - int i; - switch (tx_size) { - case TX_4X4: - vpx_memcpy(args->ctx->ta[plane], pd->above_context, - sizeof(ENTROPY_CONTEXT) * num_4x4_w); - vpx_memcpy(args->ctx->tl[plane], pd->left_context, - sizeof(ENTROPY_CONTEXT) * num_4x4_h); - break; - case TX_8X8: - for (i = 0; i < num_4x4_w; i += 2) - args->ctx->ta[plane][i] = !!*(uint16_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_h; i += 2) - args->ctx->tl[plane][i] = !!*(uint16_t *)&pd->left_context[i]; - break; - case TX_16X16: - for (i = 0; i < num_4x4_w; i += 4) - args->ctx->ta[plane][i] = !!*(uint32_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_h; i += 4) - args->ctx->tl[plane][i] = !!*(uint32_t *)&pd->left_context[i]; - break; - case TX_32X32: - for (i = 0; i < num_4x4_w; i += 8) - args->ctx->ta[plane][i] = !!*(uint64_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_h; i += 8) - args->ctx->tl[plane][i] = !!*(uint64_t *)&pd->left_context[i]; - break; - default: - assert(0); - } + vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane], + pd->above_context, pd->left_context, + num_4x4_w, num_4x4_h); } void vp9_xform_quant(int plane, int block, BLOCK_SIZE plane_bsize, @@ -482,6 +455,14 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block, pd->dst.buf, pd->dst.stride); + + // TODO(jingning): per transformed block zero forcing only enabled for + // luma component. will integrate chroma components as well. + if (x->zcoeff_blk[tx_size][block] && plane == 0) { + pd->eobs[block] = 0; + return; + } + vp9_xform_quant(plane, block, plane_bsize, tx_size, arg); if (x->optimize) diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index ed3a2bb64..db08ee856 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -314,44 +314,34 @@ void vp9_build_nmv_cost_table(int *mvjoint, build_nmv_component_cost_table(mvcost[1], &mvctx->comps[1], usehp); } -void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, int_mv *second_best_ref_mv) { +static void inc_mvs(int_mv mv[2], int_mv ref[2], int is_compound, + nmv_context_counts *counts) { + int i; + for (i = 0; i < 1 + is_compound; ++i) { + const MV diff = { mv[i].as_mv.row - ref[i].as_mv.row, + mv[i].as_mv.col - ref[i].as_mv.col }; + vp9_inc_mv(&diff, counts); + } +} + +void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]) { MODE_INFO *mi = x->e_mbd.mi_8x8[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; - MV diff; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; - int idx, idy; + const int is_compound = has_second_ref(mbmi); if (mbmi->sb_type < BLOCK_8X8) { - PARTITION_INFO *pi = x->partition_info; - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[mbmi->sb_type]; + const int num_4x4_h = num_4x4_blocks_high_lookup[mbmi->sb_type]; + int idx, idy; + + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; - if (pi->bmi[i].mode == NEWMV) { - diff.row = mi->bmi[i].as_mv[0].as_mv.row - best_ref_mv->as_mv.row; - diff.col = mi->bmi[i].as_mv[0].as_mv.col - best_ref_mv->as_mv.col; - vp9_inc_mv(&diff, &cpi->NMVcount); - - if (mi->mbmi.ref_frame[1] > INTRA_FRAME) { - diff.row = mi->bmi[i].as_mv[1].as_mv.row - - second_best_ref_mv->as_mv.row; - diff.col = mi->bmi[i].as_mv[1].as_mv.col - - second_best_ref_mv->as_mv.col; - vp9_inc_mv(&diff, &cpi->NMVcount); - } - } + if (x->partition_info->bmi[i].mode == NEWMV) + inc_mvs(mi->bmi[i].as_mv, best_ref_mv, is_compound, &cpi->NMVcount); } } } else if (mbmi->mode == NEWMV) { - diff.row = mbmi->mv[0].as_mv.row - best_ref_mv->as_mv.row; - diff.col = mbmi->mv[0].as_mv.col - best_ref_mv->as_mv.col; - vp9_inc_mv(&diff, &cpi->NMVcount); - - if (mbmi->ref_frame[1] > INTRA_FRAME) { - diff.row = mbmi->mv[1].as_mv.row - second_best_ref_mv->as_mv.row; - diff.col = mbmi->mv[1].as_mv.col - second_best_ref_mv->as_mv.col; - vp9_inc_mv(&diff, &cpi->NMVcount); - } + inc_mvs(mbmi->mv, best_ref_mv, is_compound, &cpi->NMVcount); } } diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index 2789ce114..633177885 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -25,7 +25,7 @@ void vp9_build_nmv_cost_table(int *mvjoint, int usehp, int mvc_flag_v, int mvc_flag_h); -void vp9_update_nmv_count(VP9_COMP *cpi, MACROBLOCK *x, - int_mv *best_ref_mv, int_mv *second_best_ref_mv); + +void vp9_update_mv_count(VP9_COMP *cpi, MACROBLOCK *x, int_mv best_ref_mv[2]); #endif // VP9_ENCODER_VP9_ENCODEMV_H_ diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 6e44e604c..eaa3bd183 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -534,10 +534,11 @@ void vp9_first_pass(VP9_COMP *cpi) { recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); - // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_row_min = -((mb_row * 16) + (VP9BORDERINPIXELS - 8)); + // Set up limit values for motion vectors to prevent them extending + // outside the UMV borders + x->mv_row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) - + (VP9BORDERINPIXELS - 8); + + BORDER_MV_PIXELS_B16; // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -583,9 +584,9 @@ void vp9_first_pass(VP9_COMP *cpi) { intra_error += (int64_t)this_error; // Set up limit values for motion vectors to prevent them extending outside the UMV borders - x->mv_col_min = -((mb_col * 16) + (VP9BORDERINPIXELS - 8)); + x->mv_col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) - + (VP9BORDERINPIXELS - 8); + + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search if (cm->current_video_frame > 0) { diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 5a671f201..0a6576eb5 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -10,14 +10,17 @@ #include <limits.h> -#include <vpx_mem/vpx_mem.h> -#include <vp9/encoder/vp9_encodeintra.h> -#include <vp9/encoder/vp9_rdopt.h> -#include <vp9/common/vp9_blockd.h> -#include <vp9/common/vp9_reconinter.h> -#include <vp9/common/vp9_reconintra.h> -#include <vp9/common/vp9_systemdependent.h> -#include <vp9/encoder/vp9_segmentation.h> +#include "vpx_mem/vpx_mem.h" +#include "vp9/encoder/vp9_encodeintra.h" +#include "vp9/encoder/vp9_rdopt.h" +#include "vp9/encoder/vp9_segmentation.h" +#include "vp9/encoder/vp9_mcomp.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_systemdependent.h" + + static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, int_mv *ref_mv, @@ -46,9 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, ref_full.as_mv.row = ref_mv->as_mv.row >> 3; /*cpi->sf.search_method == HEX*/ - best_err = vp9_hex_search(x, &ref_full, step_param, x->errorperbit, + best_err = vp9_hex_search(x, &ref_full.as_mv, step_param, x->errorperbit, 0, &v_fn_ptr, - 0, ref_mv, dst_mv); + 0, &ref_mv->as_mv, &dst_mv->as_mv); // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) @@ -57,7 +60,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, unsigned int sse; best_err = cpi->find_fractional_mv_step( x, - dst_mv, ref_mv, + &dst_mv->as_mv, &ref_mv->as_mv, x->errorperbit, &v_fn_ptr, 0, cpi->sf.subpel_iters_per_step, NULL, NULL, & distortion, &sse); @@ -246,9 +249,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_top_mv.as_int = 0; gld_top_mv.as_int = 0; - x->mv_row_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND); - x->mv_row_max = (cm->mb_rows - 1) * 8 + VP9BORDERINPIXELS - - 8 - VP9_INTERP_EXTEND; + x->mv_row_min = -BORDER_MV_PIXELS_B16; + x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; xd->up_available = 0; xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; @@ -267,9 +269,8 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, // Set up limit values for motion vectors to prevent them extending outside the UMV borders arf_left_mv.as_int = arf_top_mv.as_int; gld_left_mv.as_int = gld_top_mv.as_int; - x->mv_col_min = -(VP9BORDERINPIXELS - 8 - VP9_INTERP_EXTEND); - x->mv_col_max = (cm->mb_cols - 1) * 8 + VP9BORDERINPIXELS - - 8 - VP9_INTERP_EXTEND; + x->mv_col_min = -BORDER_MV_PIXELS_B16; + x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; xd->left_available = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 7dd786904..44eaa657c 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -59,38 +59,39 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) { return sr; } -int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], - int weight) { - MV v; - v.row = mv->as_mv.row - ref->as_mv.row; - v.col = mv->as_mv.col - ref->as_mv.col; - return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + - mvcost[0][v.row] + - mvcost[1][v.col]) * weight, 7); +static INLINE int mv_cost(const MV *mv, + const int *joint_cost, int *comp_cost[2]) { + return joint_cost[vp9_get_mv_joint(mv)] + + comp_cost[0][mv->row] + comp_cost[1][mv->col]; } -static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvjcost, int *mvcost[2], +int vp9_mv_bit_cost(const MV *mv, const MV *ref, + const int *mvjcost, int *mvcost[2], int weight) { + const MV diff = { mv->row - ref->row, + mv->col - ref->col }; + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); +} + +static int mv_err_cost(const MV *mv, const MV *ref, + const int *mvjcost, int *mvcost[2], int error_per_bit) { if (mvcost) { - MV v; - v.row = mv->as_mv.row - ref->as_mv.row; - v.col = mv->as_mv.col - ref->as_mv.col; - return ROUND_POWER_OF_TWO((mvjcost[vp9_get_mv_joint(&v)] + - mvcost[0][v.row] + - mvcost[1][v.col]) * error_per_bit, 13); + const MV diff = { mv->row - ref->row, + mv->col - ref->col }; + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * + error_per_bit, 13); } return 0; } -static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvjsadcost, - int *mvsadcost[2], int error_per_bit) { +static int mvsad_err_cost(const MV *mv, const MV *ref, + const int *mvjsadcost, int *mvsadcost[2], + int error_per_bit) { if (mvsadcost) { - MV v; - v.row = mv->as_mv.row - ref->as_mv.row; - v.col = mv->as_mv.col - ref->as_mv.col; - return ROUND_POWER_OF_TWO((mvjsadcost[vp9_get_mv_joint(&v)] + - mvsadcost[0][v.row] + - mvsadcost[1][v.col]) * error_per_bit, 8); + const MV diff = { mv->row - ref->row, + mv->col - ref->col }; + return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) * + error_per_bit, 8); } return 0; } @@ -273,7 +274,7 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { } int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, @@ -294,25 +295,25 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, int thismse; const int y_stride = xd->plane[0].pre[0].stride; - const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + const int offset = bestmv->row * y_stride + bestmv->col; uint8_t *y = xd->plane[0].pre[0].buf + offset; - int rr = ref_mv->as_mv.row; - int rc = ref_mv->as_mv.col; - int br = bestmv->as_mv.row * 8; - int bc = bestmv->as_mv.col * 8; + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; int hstep = 4; - const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX); + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); int tr = br; int tc = bc; // central mv - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->row <<= 3; + bestmv->col <<= 3; // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); @@ -347,7 +348,7 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; while (eighthiters--) { @@ -360,18 +361,18 @@ int vp9_find_best_sub_pixel_iterative(MACROBLOCK *x, } } - bestmv->as_mv.row = br; - bestmv->as_mv.col = bc; + bestmv->row = br; + bestmv->col = bc; - if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; } int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, @@ -391,25 +392,25 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, unsigned int eighthiters = iters_per_step; const int y_stride = xd->plane[0].pre[0].stride; - const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col; + const int offset = bestmv->row * y_stride + bestmv->col; uint8_t *y = xd->plane[0].pre[0].buf + offset; - int rr = ref_mv->as_mv.row; - int rc = ref_mv->as_mv.col; - int br = bestmv->as_mv.row * 8; - int bc = bestmv->as_mv.col * 8; + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; int hstep = 4; - const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX); + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); int tr = br; int tc = bc; // central mv - bestmv->as_mv.row *= 8; - bestmv->as_mv.col *= 8; + bestmv->row *= 8; + bestmv->col *= 8; // calculate central point error besterr = vfp->vf(y, y_stride, z, src_stride, sse1); @@ -435,7 +436,7 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, tc = bc; } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; @@ -446,11 +447,11 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, tc = bc; } - bestmv->as_mv.row = br; - bestmv->as_mv.col = bc; + bestmv->row = br; + bestmv->col = bc; - if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; @@ -463,7 +464,7 @@ int vp9_find_best_sub_pixel_tree(MACROBLOCK *x, z, src_stride, &sse, second_pred) int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, @@ -487,25 +488,25 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; - const int offset = bestmv->as_mv.row * y_stride + bestmv->as_mv.col; + const int offset = bestmv->row * y_stride + bestmv->col; uint8_t *const y = xd->plane[0].pre[0].buf + offset; - int rr = ref_mv->as_mv.row; - int rc = ref_mv->as_mv.col; - int br = bestmv->as_mv.row * 8; - int bc = bestmv->as_mv.col * 8; + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; int hstep = 4; - const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX); + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); int tr = br; int tc = bc; // central mv - bestmv->as_mv.row *= 8; - bestmv->as_mv.col *= 8; + bestmv->row *= 8; + bestmv->col *= 8; // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- @@ -543,7 +544,7 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, } } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; while (eighthiters--) { @@ -555,18 +556,18 @@ int vp9_find_best_sub_pixel_comp_iterative(MACROBLOCK *x, tc = bc; } } - bestmv->as_mv.row = br; - bestmv->as_mv.col = bc; + bestmv->row = br; + bestmv->col = bc; - if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; } int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, @@ -589,25 +590,25 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); const int y_stride = xd->plane[0].pre[0].stride; - const int offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; + const int offset = bestmv->row * y_stride + bestmv->col; uint8_t *y = xd->plane[0].pre[0].buf + offset; - int rr = ref_mv->as_mv.row; - int rc = ref_mv->as_mv.col; - int br = bestmv->as_mv.row * 8; - int bc = bestmv->as_mv.col * 8; + int rr = ref_mv->row; + int rc = ref_mv->col; + int br = bestmv->row * 8; + int bc = bestmv->col * 8; int hstep = 4; - const int minc = MAX(x->mv_col_min * 8, ref_mv->as_mv.col - MV_MAX); - const int maxc = MIN(x->mv_col_max * 8, ref_mv->as_mv.col + MV_MAX); - const int minr = MAX(x->mv_row_min * 8, ref_mv->as_mv.row - MV_MAX); - const int maxr = MIN(x->mv_row_max * 8, ref_mv->as_mv.row + MV_MAX); + const int minc = MAX(x->mv_col_min * 8, ref_mv->col - MV_MAX); + const int maxc = MIN(x->mv_col_max * 8, ref_mv->col + MV_MAX); + const int minr = MAX(x->mv_row_min * 8, ref_mv->row - MV_MAX); + const int maxr = MIN(x->mv_row_max * 8, ref_mv->row + MV_MAX); int tr = br; int tc = bc; // central mv - bestmv->as_mv.row *= 8; - bestmv->as_mv.col *= 8; + bestmv->row *= 8; + bestmv->col *= 8; // calculate central point error // TODO(yunqingwang): central pointer error was already calculated in full- @@ -641,7 +642,7 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, tc = bc; } - if (xd->allow_high_precision_mv && vp9_use_mv_hp(&ref_mv->as_mv) && + if (xd->allow_high_precision_mv && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; @@ -651,11 +652,11 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, tr = br; tc = bc; } - bestmv->as_mv.row = br; - bestmv->as_mv.col = bc; + bestmv->row = br; + bestmv->col = bc; - if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) return INT_MAX; return besterr; @@ -679,10 +680,10 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, #define CHECK_POINT \ {\ - if (this_mv.as_mv.col < x->mv_col_min) continue;\ - if (this_mv.as_mv.col > x->mv_col_max) continue;\ - if (this_mv.as_mv.row < x->mv_row_min) continue;\ - if (this_mv.as_mv.row > x->mv_row_max) continue;\ + if (this_mv.col < x->mv_col_min) continue;\ + if (this_mv.col > x->mv_col_max) continue;\ + if (this_mv.row < x->mv_row_min) continue;\ + if (this_mv.row > x->mv_row_max) continue;\ } #define CHECK_BETTER \ @@ -690,7 +691,7 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, if (thissad < bestsad)\ {\ if (use_mvcost) \ - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \ + thissad += mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, \ mvjsadcost, mvsadcost, \ sad_per_bit);\ if (thissad < bestsad)\ @@ -715,14 +716,14 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x, // candidates as indicated in the num_candidates and candidates arrays // passed into this function static int vp9_pattern_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int do_refine, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, - int_mv *center_mv, int_mv *best_mv, + const MV *center_mv, MV *best_mv, const int num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES] [MAX_PATTERN_CANDIDATES]) { @@ -735,7 +736,7 @@ static int vp9_pattern_search(MACROBLOCK *x, int what_stride = x->plane[0].src.stride; int in_what_stride = xd->plane[0].pre[0].stride; int br, bc; - int_mv this_mv; + MV this_mv; int bestsad = INT_MAX; int thissad; uint8_t *base_offset; @@ -748,24 +749,22 @@ static int vp9_pattern_search(MACROBLOCK *x, int *mvjsadcost = x->nmvjointsadcost; int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]}; - fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; - fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; + fcenter_mv.as_mv.row = center_mv->row >> 3; + fcenter_mv.as_mv.col = center_mv->col >> 3; // adjust ref_mv to make sure it is within MV range - clamp_mv(&ref_mv->as_mv, - x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); - br = ref_mv->as_mv.row; - bc = ref_mv->as_mv.col; + clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); + br = ref_mv->row; + bc = ref_mv->col; // Work out the start point for the search base_offset = (uint8_t *)(xd->plane[0].pre[0].buf); this_offset = base_offset + (br * in_what_stride) + bc; - this_mv.as_mv.row = br; - this_mv.as_mv.col = bc; - bestsad = vfp->sdf(what, what_stride, this_offset, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + this_mv.row = br; + this_mv.col = bc; + bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride, 0x7fffffff) + + mvsad_err_cost(&this_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of @@ -778,21 +777,21 @@ static int vp9_pattern_search(MACROBLOCK *x, CHECK_BOUNDS((1 << t)) if (all_in) { for (i = 0; i < num_candidates[t]; i++) { - this_mv.as_mv.row = br + candidates[t][i].row; - this_mv.as_mv.col = bc + candidates[t][i].col; - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + - this_mv.as_mv.col; + this_mv.row = br + candidates[t][i].row; + this_mv.col = bc + candidates[t][i].col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { - this_mv.as_mv.row = br + candidates[t][i].row; - this_mv.as_mv.col = bc + candidates[t][i].col; + this_mv.row = br + candidates[t][i].row; + this_mv.col = bc + candidates[t][i].col; CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + - this_mv.as_mv.col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER @@ -822,21 +821,21 @@ static int vp9_pattern_search(MACROBLOCK *x, CHECK_BOUNDS((1 << s)) if (all_in) { for (i = 0; i < num_candidates[s]; i++) { - this_mv.as_mv.row = br + candidates[s][i].row; - this_mv.as_mv.col = bc + candidates[s][i].col; - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + - this_mv.as_mv.col; + this_mv.row = br + candidates[s][i].row; + this_mv.col = bc + candidates[s][i].col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { - this_mv.as_mv.row = br + candidates[s][i].row; - this_mv.as_mv.col = bc + candidates[s][i].col; + this_mv.row = br + candidates[s][i].row; + this_mv.col = bc + candidates[s][i].col; CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + - this_mv.as_mv.col; + this_offset = base_offset + (this_mv.row * in_what_stride) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER @@ -860,25 +859,21 @@ static int vp9_pattern_search(MACROBLOCK *x, get_next_chkpts(next_chkpts_indices, k, num_candidates[s]); if (all_in) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.as_mv.row = br + - candidates[s][next_chkpts_indices[i]].row; - this_mv.as_mv.col = bc + - candidates[s][next_chkpts_indices[i]].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + - this_mv.as_mv.col; + this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; + this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; + this_offset = base_offset + (this_mv.row * (in_what_stride)) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - this_mv.as_mv.row = br + - candidates[s][next_chkpts_indices[i]].row; - this_mv.as_mv.col = bc + - candidates[s][next_chkpts_indices[i]].col; + this_mv.row = br + candidates[s][next_chkpts_indices[i]].row; + this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col; CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + - this_mv.as_mv.col; + this_offset = base_offset + (this_mv.row * (in_what_stride)) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER @@ -905,21 +900,21 @@ static int vp9_pattern_search(MACROBLOCK *x, CHECK_BOUNDS(1) if (all_in) { for (i = 0; i < 4; i++) { - this_mv.as_mv.row = br + neighbors[i].row; - this_mv.as_mv.col = bc + neighbors[i].col; - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + - this_mv.as_mv.col; + this_mv.row = br + neighbors[i].row; + this_mv.col = bc + neighbors[i].col; + this_offset = base_offset + (this_mv.row * (in_what_stride)) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER } } else { for (i = 0; i < 4; i++) { - this_mv.as_mv.row = br + neighbors[i].row; - this_mv.as_mv.col = bc + neighbors[i].col; + this_mv.row = br + neighbors[i].row; + this_mv.col = bc + neighbors[i].col; CHECK_POINT - this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + - this_mv.as_mv.col; + this_offset = base_offset + (this_mv.row * (in_what_stride)) + + this_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride, bestsad); CHECK_BETTER @@ -935,31 +930,32 @@ static int vp9_pattern_search(MACROBLOCK *x, } } - best_mv->as_mv.row = br; - best_mv->as_mv.col = bc; + best_mv->row = br; + best_mv->col = bc; - this_offset = base_offset + (best_mv->as_mv.row * (in_what_stride)) + - best_mv->as_mv.col; - this_mv.as_mv.row = best_mv->as_mv.row * 8; - this_mv.as_mv.col = best_mv->as_mv.col * 8; + this_offset = base_offset + (best_mv->row * in_what_stride) + + best_mv->col; + this_mv.row = best_mv->row * 8; + this_mv.col = best_mv->col * 8; if (bestsad == INT_MAX) return INT_MAX; - return - vfp->vf(what, what_stride, this_offset, in_what_stride, - (unsigned int *)(&bestsad)) + - use_mvcost ? mv_err_cost(&this_mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) : 0; + + return vfp->vf(what, what_stride, this_offset, in_what_stride, + (unsigned int *)&bestsad) + + use_mvcost ? mv_err_cost(&this_mv, center_mv, + x->nmvjointcost, x->mvcost, x->errorperbit) + : 0; } int vp9_hex_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, - int_mv *center_mv, int_mv *best_mv) { + const MV *center_mv, MV *best_mv) { // First scale has 8-closest points, the rest have 6 points in hex shape // at increasing scales static const int hex_num_candidates[MAX_PATTERN_SCALES] = { @@ -988,14 +984,14 @@ int vp9_hex_search(MACROBLOCK *x, } int vp9_bigdia_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, - int_mv *center_mv, - int_mv *best_mv) { + const MV *center_mv, + MV *best_mv) { // First scale has 4-closest points, the rest have 8 points in diamond // shape at increasing scales static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { @@ -1022,22 +1018,21 @@ int vp9_bigdia_search(MACROBLOCK *x, {{-512, -512}, {0, -1024}, {512, -512}, {1024, 0}, {512, 512}, {0, 1024}, {-512, 512}, {-1024, 0}}, }; - return - vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, 0, vfp, use_mvcost, - center_mv, best_mv, - bigdia_num_candidates, bigdia_candidates); + return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, + do_init_search, 0, vfp, use_mvcost, + center_mv, best_mv, + bigdia_num_candidates, bigdia_candidates); } int vp9_square_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, - int_mv *center_mv, - int_mv *best_mv) { + const MV *center_mv, + MV *best_mv) { // All scales have 8 closest points in square shape static const int square_num_candidates[MAX_PATTERN_SCALES] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -1064,11 +1059,10 @@ int vp9_square_search(MACROBLOCK *x, {{-1024, -1024}, {0, -1024}, {1024, -1024}, {1024, 0}, {1024, 1024}, {0, 1024}, {-1024, 1024}, {-1024, 0}}, }; - return - vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, - do_init_search, 0, vfp, use_mvcost, - center_mv, best_mv, - square_num_candidates, square_candidates); + return vp9_pattern_search(x, ref_mv, search_param, sad_per_bit, + do_init_search, 0, vfp, use_mvcost, + center_mv, best_mv, + square_num_candidates, square_candidates); }; #undef CHECK_BOUNDS @@ -1124,10 +1118,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, best_address = in_what; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, - in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. @@ -1153,7 +1146,7 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1185,7 +1178,7 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1210,8 +1203,9 @@ int vp9_diamond_search_sad_c(MACROBLOCK *x, return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, center_mv, mvjcost, - mvcost, x->errorperbit); + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); } int vp9_diamond_search_sadx4(MACROBLOCK *x, @@ -1265,10 +1259,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, best_address = in_what; // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, - in_what, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // search_param determines the length of the initial step and hence the number of iterations // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. @@ -1303,7 +1296,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, if (sad_array[t] < bestsad) { this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; - sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, + sad_array[t] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (sad_array[t] < bestsad) { @@ -1327,7 +1320,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1358,7 +1351,7 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1383,8 +1376,9 @@ int vp9_diamond_search_sadx4(MACROBLOCK *x, return INT_MAX; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + mv_err_cost(&this_mv, - center_mv, mvjcost, mvcost, x->errorperbit); + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); } /* do_refine: If last step (1-away) of n-step search doesn't pick the center @@ -1495,8 +1489,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch // beyond the UMV border @@ -1513,8 +1507,8 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride, bestsad); this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1531,10 +1525,10 @@ int vp9_full_search_sad_c(MACROBLOCK *x, int_mv *ref_mv, this_mv.as_mv.col = best_mv->as_mv.col * 8; if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -1585,8 +1579,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch // beyond the UMV border @@ -1610,8 +1604,8 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1631,7 +1625,7 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1652,10 +1646,10 @@ int vp9_full_search_sadx3(MACROBLOCK *x, int_mv *ref_mv, this_mv.as_mv.col = best_mv->as_mv.col * 8; if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -1708,8 +1702,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, // Baseline value at the centre bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) - + mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, - sad_per_bit); + + mvsad_err_cost(&best_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); // Apply further limits to prevent us looking using vectors that stretch // beyond the UMV border @@ -1733,8 +1727,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1759,7 +1753,7 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { @@ -1780,8 +1774,8 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, if (thissad < bestsad) { this_mv.as_mv.col = c; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, - mvjsadcost, mvsadcost, sad_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1800,10 +1794,10 @@ int vp9_full_search_sadx8(MACROBLOCK *x, int_mv *ref_mv, this_mv.as_mv.col = best_mv->as_mv.col * 8; if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -1834,8 +1828,10 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, best_address, + in_what_stride, 0x7fffffff) + + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; @@ -1852,8 +1848,8 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, - mvsadcost, error_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1876,10 +1872,10 @@ int vp9_refining_search_sad_c(MACROBLOCK *x, this_mv.as_mv.col = ref_mv->as_mv.col * 8; if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -1911,8 +1907,10 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; - bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + bestsad = fn_ptr->sdf(what, what_stride, best_address, + in_what_stride, 0x7fffffff) + + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; @@ -1935,8 +1933,8 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, if (sad_array[j] < bestsad) { this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; - sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, - mvsadcost, error_per_bit); + sad_array[j] += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); if (sad_array[j] < bestsad) { bestsad = sad_array[j]; @@ -1957,8 +1955,8 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, - mvsadcost, error_per_bit); + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; @@ -1982,10 +1980,10 @@ int vp9_refining_search_sadx4(MACROBLOCK *x, this_mv.as_mv.col = ref_mv->as_mv.col * 8; if (bestsad < INT_MAX) - return - fn_ptr->vf(what, what_stride, best_address, in_what_stride, - (unsigned int *)(&thissad)) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->vf(what, what_stride, best_address, in_what_stride, + (unsigned int *)(&thissad)) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); else return INT_MAX; } @@ -2025,7 +2023,8 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, /* Get compound pred by averaging two pred blocks. */ bestsad = fn_ptr->sdaf(what, what_stride, best_address, in_what_stride, second_pred, 0x7fffffff) + - mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit); + mvsad_err_cost(&ref_mv->as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); for (i = 0; i < search_range; i++) { int best_site = -1; @@ -2048,9 +2047,8 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; - thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvjsadcost, - mvsadcost, error_per_bit); - + thissad += mvsad_err_cost(&this_mv.as_mv, &fcenter_mv.as_mv, + mvjsadcost, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = j; @@ -2075,10 +2073,10 @@ int vp9_refining_search_8p_c(MACROBLOCK *x, if (bestsad < INT_MAX) { // FIXME(rbultje, yunqing): add full-pixel averaging variance functions // so we don't have to use the subpixel with xoff=0,yoff=0 here. - return fn_ptr->svaf(best_address, in_what_stride, 0, 0, - what, what_stride, (unsigned int *)(&thissad), - second_pred) + - mv_err_cost(&this_mv, center_mv, mvjcost, mvcost, x->errorperbit); + return fn_ptr->svaf(best_address, in_what_stride, 0, 0, what, what_stride, + (unsigned int *)(&thissad), second_pred) + + mv_err_cost(&this_mv.as_mv, ¢er_mv->as_mv, + mvjcost, mvcost, x->errorperbit); } else { return INT_MAX; } diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 3598fa09a..77c157c5b 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -22,10 +22,14 @@ #define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) // Maximum size of the first step in full pel units #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) +// Allowed motion vector pixel distance outside image border +// for Block_16x16 +#define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) + void vp9_clamp_mv_min_max(MACROBLOCK *x, MV *mv); -int vp9_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvjcost, - int *mvcost[2], int weight); +int vp9_mv_bit_cost(const MV *mv, const MV *ref, + const int *mvjcost, int *mvcost[2], int weight); void vp9_init_dsmotion_compensation(MACROBLOCK *x, int stride); void vp9_init3smotion_compensation(MACROBLOCK *x, int stride); @@ -40,37 +44,36 @@ int vp9_full_pixel_diamond(struct VP9_COMP *cpi, MACROBLOCK *x, int_mv *ref_mv, int_mv *dst_mv); int vp9_hex_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int error_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vf, int use_mvcost, - int_mv *center_mv, - int_mv *best_mv); + const MV *center_mv, + MV *best_mv); int vp9_bigdia_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int error_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vf, int use_mvcost, - int_mv *center_mv, - int_mv *best_mv); + const MV *center_mv, + MV *best_mv); int vp9_square_search(MACROBLOCK *x, - int_mv *ref_mv, + MV *ref_mv, int search_param, int error_per_bit, int do_init_search, const vp9_variance_fn_ptr_t *vf, int use_mvcost, - int_mv *center_mv, - int_mv *best_mv); + const MV *center_mv, + MV *best_mv); typedef int (fractional_mv_step_fp) ( MACROBLOCK *x, - int_mv *bestmv, - int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only @@ -84,7 +87,7 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; typedef int (fractional_mv_step_comp_fp) ( MACROBLOCK *x, - int_mv *bestmv, int_mv *ref_mv, + MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9b20dafde..a106014f8 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -61,16 +61,11 @@ #define INTRA_ZBIN_BOOST 0 typedef struct { - nmv_context nmvc; int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; int nmvcosts_hp[2][MV_VALS]; vp9_prob segment_pred_probs[PREDICTION_PROBS]; - vp9_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; - vp9_prob comp_inter_prob[COMP_INTER_CONTEXTS]; - vp9_prob single_ref_prob[REF_CONTEXTS][2]; - vp9_prob comp_ref_prob[REF_CONTEXTS]; unsigned char *last_frame_seg_map_copy; @@ -79,20 +74,8 @@ typedef struct { // 0 = ZERO_MV, MV signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; - vp9_coeff_probs_model coef_probs[TX_SIZES][BLOCK_TYPES]; - - vp9_prob y_mode_prob[4][INTRA_MODES - 1]; - vp9_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; - vp9_prob partition_prob[2][NUM_PARTITION_CONTEXTS][PARTITION_TYPES - 1]; - - vp9_prob switchable_interp_prob[SWITCHABLE_FILTERS + 1] - [SWITCHABLE_FILTERS - 1]; - int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2]; - vp9_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; - - struct tx_probs tx_probs; - vp9_prob mbskip_probs[MBSKIP_CONTEXTS]; + FRAME_CONTEXT fc; } CODING_CONTEXT; typedef struct { @@ -649,7 +632,7 @@ typedef struct VP9_COMP { unsigned int switchable_interp_count[SWITCHABLE_FILTERS + 1] [SWITCHABLE_FILTERS]; - unsigned int txfm_stepdown_count[TX_SIZES]; + unsigned int tx_stepdown_count[TX_SIZES]; int initial_width; int initial_height; @@ -712,9 +695,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x); void vp9_set_speed_features(VP9_COMP *cpi); -extern int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest); +int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); -extern void vp9_alloc_compressor_data(VP9_COMP *cpi); +void vp9_alloc_compressor_data(VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_ONYX_INT_H_ diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 6c8b2a04b..05e893ee9 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -337,10 +337,10 @@ void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_mb_init_quantizer(cpi, &cpi->mb); } -void vp9_set_quantizer(struct VP9_COMP *cpi, int Q) { +void vp9_set_quantizer(struct VP9_COMP *cpi, int q) { VP9_COMMON *cm = &cpi->common; - cm->base_qindex = Q; + cm->base_qindex = q; // if any of the delta_q values are changing update flag will // have to be set. diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 3229eaad2..3191c49ae 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -30,14 +30,14 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks); struct VP9_COMP; -extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); +void vp9_set_quantizer(struct VP9_COMP *cpi, int q); -extern void vp9_frame_init_quantizer(struct VP9_COMP *cpi); +void vp9_frame_init_quantizer(struct VP9_COMP *cpi); -extern void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x); +void vp9_update_zbin_extra(struct VP9_COMP *cpi, MACROBLOCK *x); -extern void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x); +void vp9_mb_init_quantizer(struct VP9_COMP *cpi, MACROBLOCK *x); -extern void vp9_init_quantizer(struct VP9_COMP *cpi); +void vp9_init_quantizer(struct VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_QUANTIZE_H_ diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 2d12ba94f..bbcad172d 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -76,35 +76,19 @@ void vp9_save_coding_context(VP9_COMP *cpi) { // restored with a call to vp9_restore_coding_context. These functions are // intended for use in a re-code loop in vp9_compress_frame where the // quantizer value is adjusted between loop iterations. - - cc->nmvc = cm->fc.nmvc; vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); vp9_copy(cc->nmvcosts, cpi->mb.nmvcosts); vp9_copy(cc->nmvcosts_hp, cpi->mb.nmvcosts_hp); - vp9_copy(cc->inter_mode_probs, cm->fc.inter_mode_probs); - - vp9_copy(cc->y_mode_prob, cm->fc.y_mode_prob); - vp9_copy(cc->uv_mode_prob, cm->fc.uv_mode_prob); - vp9_copy(cc->partition_prob, cm->fc.partition_prob); - vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); - vp9_copy(cc->intra_inter_prob, cm->fc.intra_inter_prob); - vp9_copy(cc->comp_inter_prob, cm->fc.comp_inter_prob); - vp9_copy(cc->single_ref_prob, cm->fc.single_ref_prob); - vp9_copy(cc->comp_ref_prob, cm->fc.comp_ref_prob); - vpx_memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); - vp9_copy(cc->coef_probs, cm->fc.coef_probs); - vp9_copy(cc->switchable_interp_prob, cm->fc.switchable_interp_prob); - cc->tx_probs = cm->fc.tx_probs; - vp9_copy(cc->mbskip_probs, cm->fc.mbskip_probs); + cc->fc = cm->fc; } void vp9_restore_coding_context(VP9_COMP *cpi) { @@ -113,25 +97,12 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { // Restore key state variables to the snapshot state stored in the // previous call to vp9_save_coding_context. - - cm->fc.nmvc = cc->nmvc; vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); vp9_copy(cpi->mb.nmvcosts, cc->nmvcosts); vp9_copy(cpi->mb.nmvcosts_hp, cc->nmvcosts_hp); - vp9_copy(cm->fc.inter_mode_probs, cc->inter_mode_probs); - - vp9_copy(cm->fc.y_mode_prob, cc->y_mode_prob); - vp9_copy(cm->fc.uv_mode_prob, cc->uv_mode_prob); - vp9_copy(cm->fc.partition_prob, cc->partition_prob); - vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); - vp9_copy(cm->fc.intra_inter_prob, cc->intra_inter_prob); - vp9_copy(cm->fc.comp_inter_prob, cc->comp_inter_prob); - vp9_copy(cm->fc.single_ref_prob, cc->single_ref_prob); - vp9_copy(cm->fc.comp_ref_prob, cc->comp_ref_prob); - vpx_memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy, (cm->mi_rows * cm->mi_cols)); @@ -139,10 +110,7 @@ void vp9_restore_coding_context(VP9_COMP *cpi) { vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); - vp9_copy(cm->fc.coef_probs, cc->coef_probs); - vp9_copy(cm->fc.switchable_interp_prob, cc->switchable_interp_prob); - cm->fc.tx_probs = cc->tx_probs; - vp9_copy(cm->fc.mbskip_probs, cc->mbskip_probs); + cm->fc = cc->fc; } void vp9_setup_key_frame(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 473317605..ddda7130c 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -32,8 +32,8 @@ int vp9_pick_frame_size(VP9_COMP *cpi); double vp9_convert_qindex_to_q(int qindex); int vp9_gfboost_qadjust(int qindex); -extern int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex, - double correction_factor); +int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex, + double correction_factor); void vp9_setup_inter_frame(VP9_COMP *cpi); #endif // VP9_ENCODER_VP9_RATECTRL_H_ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 3ef3eeeeb..83cd61226 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -186,6 +186,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { // cpi->common.refresh_alt_ref_frame) qindex = clamp(qindex, 0, MAXQ); + cpi->RDDIV = 100; cpi->RDMULT = compute_rd_mult(qindex); if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { if (cpi->twopass.next_iiratio > 31) @@ -204,42 +205,18 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) { if (q < 8) q = 8; - if (cpi->RDMULT > 1000) { - cpi->RDDIV = 1; - cpi->RDMULT /= 100; + for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { + for (i = 0; i < MAX_MODES; i++) { + // Threshold here seem unecessarily harsh but fine given actual + // range of values used for cpi->sf.thresh_mult[] + int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - for (i = 0; i < MAX_MODES; ++i) { - // Threshold here seem unecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[] - int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - - // *4 relates to the scaling of rd_thresh_block_size_factor[] - if ((int64_t)cpi->sf.thresh_mult[i] < thresh_max) { - cpi->rd_threshes[bsize][i] = - cpi->sf.thresh_mult[i] * q * - rd_thresh_block_size_factor[bsize] / (4 * 100); - } else { - cpi->rd_threshes[bsize][i] = INT_MAX; - } - } - } - } else { - cpi->RDDIV = 100; - - for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { - for (i = 0; i < MAX_MODES; i++) { - // Threshold here seem unecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[] - int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]); - - if (cpi->sf.thresh_mult[i] < thresh_max) { - cpi->rd_threshes[bsize][i] = + if (cpi->sf.thresh_mult[i] < thresh_max) { + cpi->rd_threshes[bsize][i] = cpi->sf.thresh_mult[i] * q * rd_thresh_block_size_factor[bsize] / 4; - } else { - cpi->rd_threshes[bsize][i] = INT_MAX; - } + } else { + cpi->rd_threshes[bsize][i] = INT_MAX; } } } @@ -554,9 +531,13 @@ struct rdcost_block_args { TX_SIZE tx_size; int bw; int bh; - int rate; - int64_t dist; - int64_t sse; + int rate[256]; + int64_t dist[256]; + int64_t sse[256]; + int this_rate; + int64_t this_dist; + int64_t this_sse; + int64_t this_rd; int64_t best_rd; int skip; const int16_t *scan, *nb; @@ -573,17 +554,17 @@ static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) { int shift = args->tx_size == TX_32X32 ? 0 : 2; int16_t *const coeff = BLOCK_OFFSET(p->coeff, block); int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - args->dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + args->dist[block] = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift; - args->sse += this_sse >> shift; + args->sse[block] = this_sse >> shift; if (x->skip_encode && xd->this_mi->mbmi.ref_frame[0] == INTRA_FRAME) { // TODO(jingning): tune the model to better capture the distortion. int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> shift; - args->dist += p; - args->sse += p; + args->dist[block] = p; + args->sse[block] = p; } } @@ -594,10 +575,10 @@ static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize, int x_idx, y_idx; txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx); - args->rate += cost_coeffs(args->x, plane, block, - args->t_above + x_idx, - args->t_left + y_idx, args->tx_size, - args->scan, args->nb); + args->rate[block] = cost_coeffs(args->x, plane, block, + args->t_above + x_idx, + args->t_left + y_idx, args->tx_size, + args->scan, args->nb); } static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, @@ -610,16 +591,6 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (args->skip) return; - rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse); - rd = MIN(rd1, rd2); - if (rd > args->best_rd) { - args->skip = 1; - args->rate = INT_MAX; - args->dist = INT64_MAX; - args->sse = INT64_MAX; - return; - } if (!is_inter_block(&xd->this_mi->mbmi)) vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args); @@ -628,6 +599,56 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, dist_block(plane, block, tx_size, args); rate_block(plane, block, plane_bsize, tx_size, args); + rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]); + rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]); + + // TODO(jingning): temporarily enabled only for luma component + rd = MIN(rd1, rd2); + if (plane == 0) + x->zcoeff_blk[tx_size][block] = rd1 > rd2; + + args->this_rate += args->rate[block]; + args->this_dist += args->dist[block]; + args->this_sse += args->sse[block]; + args->this_rd += rd; + + if (args->this_rd > args->best_rd) { + args->skip = 1; + return; + } +} + +void vp9_get_entropy_contexts(TX_SIZE tx_size, + ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16], + const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left, + int num_4x4_w, int num_4x4_h) { + int i; + switch (tx_size) { + case TX_4X4: + vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); + vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); + break; + case TX_8X8: + for (i = 0; i < num_4x4_w; i += 2) + t_above[i] = !!*(const uint16_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 2) + t_left[i] = !!*(const uint16_t *)&left[i]; + break; + case TX_16X16: + for (i = 0; i < num_4x4_w; i += 4) + t_above[i] = !!*(const uint32_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 4) + t_left[i] = !!*(const uint32_t *)&left[i]; + break; + case TX_32X32: + for (i = 0; i < num_4x4_w; i += 8) + t_above[i] = !!*(const uint64_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 8) + t_left[i] = !!*(const uint64_t *)&left[i]; + break; + default: + assert(!"Invalid transform size."); + } } static void txfm_rd_in_plane(MACROBLOCK *x, @@ -638,45 +659,33 @@ static void txfm_rd_in_plane(MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[plane]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; - int i; + const int num_4x4_w = num_4x4_blocks_wide_lookup[bs]; + const int num_4x4_h = num_4x4_blocks_high_lookup[bs]; + struct rdcost_block_args args = { x, { 0 }, { 0 }, tx_size, - num_4x4_blocks_wide, num_4x4_blocks_high, - 0, 0, 0, ref_best_rd, 0 }; + num_4x4_w, num_4x4_h, + { 0 }, { 0 }, { 0 }, + 0, 0, 0, 0, ref_best_rd, 0 }; if (plane == 0) xd->this_mi->mbmi.tx_size = tx_size; + vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left, + pd->above_context, pd->left_context, + num_4x4_w, num_4x4_h); switch (tx_size) { case TX_4X4: - vpx_memcpy(&args.t_above, pd->above_context, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide); - vpx_memcpy(&args.t_left, pd->left_context, - sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high); get_scan_nb_4x4(get_tx_type_4x4(pd->plane_type, xd, 0), &args.scan, &args.nb); break; case TX_8X8: - for (i = 0; i < num_4x4_blocks_wide; i += 2) - args.t_above[i] = !!*(uint16_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_blocks_high; i += 2) - args.t_left[i] = !!*(uint16_t *)&pd->left_context[i]; get_scan_nb_8x8(get_tx_type_8x8(pd->plane_type, xd), &args.scan, &args.nb); break; case TX_16X16: - for (i = 0; i < num_4x4_blocks_wide; i += 4) - args.t_above[i] = !!*(uint32_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_blocks_high; i += 4) - args.t_left[i] = !!*(uint32_t *)&pd->left_context[i]; get_scan_nb_16x16(get_tx_type_16x16(pd->plane_type, xd), &args.scan, &args.nb); break; case TX_32X32: - for (i = 0; i < num_4x4_blocks_wide; i += 8) - args.t_above[i] = !!*(uint64_t *)&pd->above_context[i]; - for (i = 0; i < num_4x4_blocks_high; i += 8) - args.t_left[i] = !!*(uint64_t *)&pd->left_context[i]; args.scan = vp9_default_scan_32x32; args.nb = vp9_default_scan_32x32_neighbors; break; @@ -685,10 +694,17 @@ static void txfm_rd_in_plane(MACROBLOCK *x, } foreach_transformed_block_in_plane(xd, bsize, plane, block_yrd_txfm, &args); - *distortion = args.dist; - *rate = args.rate; - *sse = args.sse; - *skippable = vp9_is_skippable_in_plane(xd, bsize, plane) && (!args.skip); + if (args.skip) { + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } else { + *distortion = args.this_dist; + *rate = args.this_rate; + *sse = args.this_sse; + *skippable = vp9_is_skippable_in_plane(xd, bsize, plane); + } } static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, @@ -696,15 +712,15 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, int *skip, int64_t *sse, int64_t ref_best_rd, BLOCK_SIZE bs) { - const TX_SIZE max_txfm_size = max_txsize_lookup[bs]; + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && (cm->tx_mode == ALLOW_32X32 || cm->tx_mode == TX_MODE_SELECT)) { mbmi->tx_size = TX_32X32; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32 || cm->tx_mode == TX_MODE_SELECT)) { @@ -717,7 +733,7 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x, txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); - cpi->txfm_stepdown_count[0]++; + cpi->tx_stepdown_count[0]++; } static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, @@ -811,15 +827,15 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] && rd[TX_32X32][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[0]++; + cpi->tx_stepdown_count[0]++; } else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_tx_size - TX_16X16]++; + cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_tx_size - TX_8X8]++; + cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; } else { - cpi->txfm_stepdown_count[max_tx_size - TX_4X4]++; + cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; } } @@ -829,7 +845,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, int *s, int *skip, int64_t *sse, int64_t ref_best_rd, BLOCK_SIZE bs) { - const TX_SIZE max_txfm_size = max_txsize_lookup[bs]; + const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; @@ -845,9 +861,9 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, // for (n = TX_4X4; n <= max_txfm_size; n++) // r[n][0] = (r[n][0] * scale_r[n]); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { r[n][1] = r[n][0]; - for (m = 0; m <= n - (n == max_txfm_size); m++) { + for (m = 0; m <= n - (n == max_tx_size); m++) { if (m == n) r[n][1] += vp9_cost_zero(tx_probs[m]); else @@ -859,7 +875,7 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { if (s[n]) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]); } else { @@ -867,19 +883,19 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } } - for (n = TX_4X4; n <= max_txfm_size; n++) { + for (n = TX_4X4; n <= max_tx_size; n++) { rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]); rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]); } - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && (cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && rd[TX_32X32][1] <= rd[TX_16X16][1] && rd[TX_32X32][1] <= rd[TX_8X8][1] && rd[TX_32X32][1] <= rd[TX_4X4][1]))) { mbmi->tx_size = TX_32X32; - } else if (max_txfm_size >= TX_16X16 && + } else if (max_tx_size >= TX_16X16 && (cm->tx_mode == ALLOW_16X16 || cm->tx_mode == ALLOW_32X32 || (cm->tx_mode == TX_MODE_SELECT && @@ -901,19 +917,19 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x, txfm_rd_in_plane(x, rate, distortion, skip, &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size); - if (max_txfm_size == TX_32X32 && + if (max_tx_size == TX_32X32 && rd[TX_32X32][1] <= rd[TX_16X16][1] && rd[TX_32X32][1] <= rd[TX_8X8][1] && rd[TX_32X32][1] <= rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[0]++; - } else if (max_txfm_size >= TX_16X16 && + cpi->tx_stepdown_count[0]++; + } else if (max_tx_size >= TX_16X16 && rd[TX_16X16][1] <= rd[TX_8X8][1] && rd[TX_16X16][1] <= rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_16X16]++; + cpi->tx_stepdown_count[max_tx_size - TX_16X16]++; } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) { - cpi->txfm_stepdown_count[max_txfm_size - TX_8X8]++; + cpi->tx_stepdown_count[max_tx_size - TX_8X8]++; } else { - cpi->txfm_stepdown_count[max_txfm_size - TX_4X4]++; + cpi->tx_stepdown_count[max_tx_size - TX_4X4]++; } } @@ -1058,6 +1074,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { int64_t ssz; const int16_t *scan; + const int16_t *nb; uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride; uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride; @@ -1083,10 +1100,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, x->quantize_b_4x4(x, block, tx_type, 16); } - scan = get_scan_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block)); + get_scan_nb_4x4(get_tx_type_4x4(PLANE_TYPE_Y_WITH_DC, xd, block), + &scan, &nb); ratey += cost_coeffs(x, 0, block, - tempa + idx, templ + idy, TX_4X4, scan, - vp9_get_coef_neighbors_handle(scan)); + tempa + idx, templ + idy, TX_4X4, scan, nb); distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &ssz) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -1458,11 +1475,12 @@ static int labels2mode(MACROBLOCK *x, int i, switch (m = this_mode) { case NEWMV: this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - thismvcost = vp9_mv_bit_cost(this_mv, best_ref_mv, mvjcost, mvcost, - 102); + thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv, + mvjcost, mvcost, 102); if (has_second_rf) { this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - thismvcost += vp9_mv_bit_cost(this_second_mv, second_best_ref_mv, + thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv, + &second_best_ref_mv->as_mv, mvjcost, mvcost, 102); } break; @@ -1796,20 +1814,23 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, // adjust src pointer for this block mi_buf_shift(x, i); if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, + bestsme = vp9_hex_search(x, &mvp_full.as_mv, step_param, sadpb, 1, v_fn_ptr, 1, - bsi->ref_mv, &mode_mv[NEWMV]); + &bsi->ref_mv->as_mv, + &mode_mv[NEWMV].as_mv); } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, + bestsme = vp9_square_search(x, &mvp_full.as_mv, step_param, sadpb, 1, v_fn_ptr, 1, - bsi->ref_mv, &mode_mv[NEWMV]); + &bsi->ref_mv->as_mv, + &mode_mv[NEWMV].as_mv); } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, + bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, step_param, sadpb, 1, v_fn_ptr, 1, - bsi->ref_mv, &mode_mv[NEWMV]); + &bsi->ref_mv->as_mv, + &mode_mv[NEWMV].as_mv); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 0, v_fn_ptr, @@ -1840,8 +1861,10 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int distortion; unsigned int sse; - cpi->find_fractional_mv_step(x, &mode_mv[NEWMV], - bsi->ref_mv, x->errorperbit, v_fn_ptr, + cpi->find_fractional_mv_step(x, + &mode_mv[NEWMV].as_mv, + &bsi->ref_mv->as_mv, + x->errorperbit, v_fn_ptr, 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &distortion, &sse); @@ -2220,11 +2243,12 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY]; ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION]; - // FIXME(rbultje) does this memcpy the whole array? I believe sizeof() - // doesn't actually work this way - memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); - memcpy(ctx->best_filter_diff, best_filter_diff, - sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1)); + vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size], + sizeof(ctx->zcoeff_blk)); + + vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff)); + vpx_memcpy(ctx->best_filter_diff, best_filter_diff, + sizeof(*best_filter_diff) * (SWITCHABLE_FILTERS + 1)); } static void setup_pred_block(const MACROBLOCKD *xd, @@ -2403,23 +2427,23 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; if (cpi->sf.search_method == HEX) { - bestsme = vp9_hex_search(x, &mvp_full, + bestsme = vp9_hex_search(x, &mvp_full.as_mv, step_param, sadpb, 1, &cpi->fn_ptr[block_size], 1, - &ref_mv, tmp_mv); + &ref_mv.as_mv, &tmp_mv->as_mv); } else if (cpi->sf.search_method == SQUARE) { - bestsme = vp9_square_search(x, &mvp_full, + bestsme = vp9_square_search(x, &mvp_full.as_mv, step_param, sadpb, 1, &cpi->fn_ptr[block_size], 1, - &ref_mv, tmp_mv); + &ref_mv.as_mv, &tmp_mv->as_mv); } else if (cpi->sf.search_method == BIGDIA) { - bestsme = vp9_bigdia_search(x, &mvp_full, + bestsme = vp9_bigdia_search(x, &mvp_full.as_mv, step_param, sadpb, 1, &cpi->fn_ptr[block_size], 1, - &ref_mv, tmp_mv); + &ref_mv.as_mv, &tmp_mv->as_mv); } else { bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param, sadpb, further_steps, 1, @@ -2435,16 +2459,15 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; - cpi->find_fractional_mv_step(x, tmp_mv, &ref_mv, + cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv, x->errorperbit, &cpi->fn_ptr[block_size], 0, cpi->sf.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis, &sse); } - *rate_mv = vp9_mv_bit_cost(tmp_mv, &ref_mv, - x->nmvjointcost, x->mvcost, - 96); + *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv, + x->nmvjointcost, x->mvcost, 96); if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) x->pred_mv[ref].as_int = tmp_mv->as_int; @@ -2570,8 +2593,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, unsigned int sse; bestsme = cpi->find_fractional_mv_step_comp( - x, &tmp_mv, - &ref_mv[id], + x, &tmp_mv.as_mv, + &ref_mv[id].as_mv, x->errorperbit, &cpi->fn_ptr[block_size], 0, cpi->sf.subpel_iters_per_step, @@ -2603,11 +2626,11 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[1] = backup_second_yv12[i]; } - *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], - &mbmi->ref_mvs[refs[0]][0], + *rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &mbmi->ref_mvs[refs[0]][0].as_mv, x->nmvjointcost, x->mvcost, 96); - *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], - &mbmi->ref_mvs[refs[1]][0], + *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, + &mbmi->ref_mvs[refs[1]][0].as_mv, x->nmvjointcost, x->mvcost, 96); vpx_free(second_pred); @@ -2630,7 +2653,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->this_mi->mbmi; - const int is_comp_pred = (mbmi->ref_frame[1] > 0); + const int is_comp_pred = has_second_ref(mbmi); const int num_refs = is_comp_pred ? 2 : 1; const int this_mode = mbmi->mode; int_mv *frame_mv = mode_mv[this_mode]; @@ -2659,11 +2682,11 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, single_newmv, &rate_mv); } else { - rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]], - &mbmi->ref_mvs[refs[0]][0], + rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &mbmi->ref_mvs[refs[0]][0].as_mv, x->nmvjointcost, x->mvcost, 96); - rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]], - &mbmi->ref_mvs[refs[1]][0], + rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, + &mbmi->ref_mvs[refs[1]][0].as_mv, x->nmvjointcost, x->mvcost, 96); } if (frame_mv[refs[0]].as_int == INVALID_MV || @@ -3071,8 +3094,12 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_cost_bit(vp9_get_pred_prob_mbskip(cm, xd), 0); *returndist = dist_y + dist_uv; if (cpi->sf.tx_size_search_method == USE_FULL_RD) - for (i = 0; i < TX_MODES; i++) - ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; + for (i = 0; i < TX_MODES; i++) { + if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX) + ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode]; + else + ctx->tx_rd_diff[i] = 0; + } } ctx->mic = *xd->this_mi; @@ -3139,8 +3166,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, const int bws = num_8x8_blocks_wide_lookup[bsize] / 2; const int bhs = num_8x8_blocks_high_lookup[bsize] / 2; int best_skip2 = 0; + unsigned char best_zcoeff_blk[256] = { 0 }; x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH; + vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk)); + vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk)); for (i = 0; i < 4; i++) { int j; @@ -3812,6 +3842,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, best_mbmode = *mbmi; best_skip2 = this_skip2; best_partition = *x->partition_info; + vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], + sizeof(best_zcoeff_blk)); if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV) for (i = 0; i < 4; i++) @@ -3993,13 +4025,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, if (best_mbmode.ref_frame[0] != INTRA_FRAME && best_mbmode.sb_type < BLOCK_8X8) { for (i = 0; i < 4; i++) - xd->this_mi->bmi[i].as_mv[0].as_int = - best_bmodes[i].as_mv[0].as_int; + xd->this_mi->bmi[i].as_mv[0].as_int = best_bmodes[i].as_mv[0].as_int; - if (mbmi->ref_frame[1] > 0) + if (has_second_ref(mbmi)) for (i = 0; i < 4; i++) - xd->this_mi->bmi[i].as_mv[1].as_int = - best_bmodes[i].as_mv[1].as_int; + xd->this_mi->bmi[i].as_mv[1].as_int = best_bmodes[i].as_mv[1].as_int; *x->partition_info = best_partition; @@ -4007,6 +4037,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int; } + vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk, + sizeof(best_zcoeff_blk)); + for (i = 0; i < NB_PREDICTION_TYPES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index eba7df907..9796c0d7c 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -33,4 +33,9 @@ void vp9_init_me_luts(); void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); +void vp9_get_entropy_contexts(TX_SIZE tx_size, + ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16], + const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left, + int num_4x4_w, int num_4x4_h); + #endif // VP9_ENCODER_VP9_RDOPT_H_ diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index 10655e8a7..874b71ab1 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -130,6 +130,8 @@ static void count_segs(VP9_COMP *cpi, MODE_INFO **mi_8x8, return; segment_id = mi_8x8[0]->mbmi.segment_id; + xd->mi_8x8 = mi_8x8; + xd->this_mi = mi_8x8[0]; set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 63826eea5..1768b5bed 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -154,10 +154,10 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, // TODO Check that the 16x16 vf & sdf are selected here // Ignore mv costing by sending NULL pointer instead of cost arrays ref_mv = &x->e_mbd.mi_8x8[0]->bmi[0].as_mv[0]; - bestsme = vp9_hex_search(x, &best_ref_mv1_full, + bestsme = vp9_hex_search(x, &best_ref_mv1_full.as_mv, step_param, sadpb, 1, &cpi->fn_ptr[BLOCK_16X16], - 0, &best_ref_mv1, ref_mv); + 0, &best_ref_mv1.as_mv, &ref_mv->as_mv); #if ALT_REF_SUBPEL_ENABLED // Try sub-pixel MC? @@ -166,8 +166,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int distortion; unsigned int sse; // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, ref_mv, - &best_ref_mv1, + bestsme = cpi->find_fractional_mv_step(x, &ref_mv->as_mv, + &best_ref_mv1.as_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, cpi->sf.subpel_iters_per_step, diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index a59f6db88..7c14c18aa 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -137,8 +137,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, pt = get_entropy_context(tx_size, pd->above_context + aoff, pd->left_context + loff); - get_scan_and_band(xd, tx_size, type, block, &scan, &band_translate); - nb = vp9_get_coef_neighbors_handle(scan); + get_scan_and_band(xd, tx_size, type, block, &scan, &nb, &band_translate); c = 0; do { const int band = get_coef_band(band_translate, c); diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index 155ba8a3e..991ef4d29 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -8,13 +8,150 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "./vp9_rtcd.h" -#include "vp9/encoder/vp9_variance.h" -#include "vp9/common/vp9_filter.h" -#include "vp9/common/vp9_subpelvar.h" -#include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -#include "./vp9_rtcd.h" +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_convolve.h" +#include "vp9/common/vp9_filter.h" +#include "vp9/encoder/vp9_variance.h" + +static void variance(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + int w, + int h, + unsigned int *sse, + int *sum) { + int i, j; + int diff; + + *sum = 0; + *sse = 0; + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + diff = src_ptr[j] - ref_ptr[j]; + *sum += diff; + *sse += diff * diff; + } + + src_ptr += source_stride; + ref_ptr += recon_stride; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_first_pass + * + * INPUTS : uint8_t *src_ptr : Pointer to source block. + * uint32_t src_pixels_per_line : Stride of input block. + * uint32_t pixel_step : Offset between filter input + * samples (see notes). + * uint32_t output_height : Input block height. + * uint32_t output_width : Input block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter + * taps. + * + * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in + * either horizontal or vertical direction to produce the + * filtered output block. Used to implement first-pass + * of 2-D separable filter. + * + * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. + * Two filter taps should sum to VP9_FILTER_WEIGHT. + * pixel_step defines whether the filter is applied + * horizontally (pixel_step=1) or vertically (pixel_step= + * stride). + * It defines the offset required to move from one input + * to the next. + * + ****************************************************************************/ +static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, + uint16_t *output_ptr, + unsigned int src_pixels_per_line, + int pixel_step, + unsigned int output_height, + unsigned int output_width, + const int16_t *vp9_filter) { + unsigned int i, j; + + for (i = 0; i < output_height; i++) { + for (j = 0; j < output_width; j++) { + output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + + (int)src_ptr[pixel_step] * vp9_filter[1], + FILTER_BITS); + + src_ptr++; + } + + // Next row... + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} + +/**************************************************************************** + * + * ROUTINE : filter_block2d_bil_second_pass + * + * INPUTS : int32_t *src_ptr : Pointer to source block. + * uint32_t src_pixels_per_line : Stride of input block. + * uint32_t pixel_step : Offset between filter input + * samples (see notes). + * uint32_t output_height : Input block height. + * uint32_t output_width : Input block width. + * int32_t *vp9_filter : Array of 2 bi-linear filter + * taps. + * + * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. + * + * RETURNS : void + * + * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in + * either horizontal or vertical direction to produce the + * filtered output block. Used to implement second-pass + * of 2-D separable filter. + * + * SPECIAL NOTES : Requires 32-bit input as produced by + * filter_block2d_bil_first_pass. + * Two filter taps should sum to VP9_FILTER_WEIGHT. + * pixel_step defines whether the filter is applied + * horizontally (pixel_step=1) or vertically (pixel_step= + * stride). + * It defines the offset required to move from one input + * to the next. + * + ****************************************************************************/ +static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, + uint8_t *output_ptr, + unsigned int src_pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const int16_t *vp9_filter) { + unsigned int i, j; + + for (i = 0; i < output_height; i++) { + for (j = 0; j < output_width; j++) { + output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + + (int)src_ptr[pixel_step] * vp9_filter[1], + FILTER_BITS); + src_ptr++; + } + + src_ptr += src_pixels_per_line - output_width; + output_ptr += output_width; + } +} unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; |