diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 73 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 126 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 117 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 90 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 343 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.h | 12 |
12 files changed, 366 insertions, 429 deletions
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 323c10350..d1437d377 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -136,7 +136,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; - const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); + const int refresh_this_block = cpi->mb.in_static_area || + candidate_refresh_aq(cr, mbmi, bsize, use_rd); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index cad124125..fcf2a0420 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -162,6 +162,8 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; + int in_static_area; + int optimize; // indicate if it is in the rd search loop or encoding process diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f88ce2d1a..9956acc0b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1273,6 +1273,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, static int is_background(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col) { + MACROBLOCK *x = &cpi->mb; uint8_t *src, *pre; int src_stride, pre_stride; @@ -1304,7 +1305,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, threshold = (row8x8_remaining * col8x8_remaining) << 6; } - return (this_sad < 2 * threshold); + x->in_static_area = (this_sad < 2 * threshold); + return x->in_static_area; } static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { @@ -2433,6 +2435,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, set_offsets(cpi, tile, mi_row, mi_col, bsize); xd->mi[0]->mbmi.sb_type = bsize; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { + if (xd->mi[0]->mbmi.segment_id && x->in_static_area) + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); + } + if (!frame_is_intra_only(cm)) { vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); @@ -2856,7 +2863,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &x->e_mbd; int mi_col; // Initialize the left context for the new SB row @@ -2874,8 +2882,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; BLOCK_SIZE bsize; - cpi->mb.source_variance = UINT_MAX; - vp9_zero(cpi->mb.pred_mv); + x->in_static_area = 0; + x->source_variance = UINT_MAX; + vp9_zero(x->pred_mv); // Set the partition type of the 64X64 block switch (cpi->sf.partition_search_type) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index f9dba9689..1dacfaaae 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -61,7 +61,7 @@ #define MIN_GF_INTERVAL 4 #endif -#define DISABLE_RC_LONG_TERM_MEM +#define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; @@ -1033,6 +1033,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { reset_fpf_position(twopass, start_pos); } + + // Reset the vbr bits off target counter + cpi->rc.vbr_bits_off_target = 0; } // This function gives an estimate of how badly we believe the prediction @@ -2192,6 +2195,23 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { cpi->rc.frames_to_key = INT_MAX; } +// For VBR...adjustment to the frame target based on error from previous frames +void vbr_rate_correction(int * this_frame_target, + const int64_t vbr_bits_off_target) { + int max_delta = (*this_frame_target * 15) / 100; + + // vbr_bits_off_target > 0 means we have extra bits to spend + if (vbr_bits_off_target > 0) { + *this_frame_target += + (vbr_bits_off_target > max_delta) ? max_delta + : (int)vbr_bits_off_target; + } else { + *this_frame_target -= + (vbr_bits_off_target < -max_delta) ? max_delta + : (int)-vbr_bits_off_target; + } +} + void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -2219,8 +2239,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { return; if (cpi->refresh_alt_ref_frame) { + int modified_target = twopass->gf_bits; + rc->base_frame_target = twopass->gf_bits; cm->frame_type = INTER_FRAME; - vp9_rc_set_frame_target(cpi, twopass->gf_bits); +#ifdef LONG_TERM_VBR_CORRECTION + // Correction to rate target based on prior over or under shoot. + if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) + vbr_rate_correction(&modified_target, rc->vbr_bits_off_target); +#endif + vp9_rc_set_frame_target(cpi, modified_target); return; } @@ -2315,6 +2342,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target); else target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target); + + rc->base_frame_target = target; +#ifdef LONG_TERM_VBR_CORRECTION + // Correction to rate target based on prior over or under shoot. + if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) + vbr_rate_correction(&target, rc->vbr_bits_off_target); +#endif vp9_rc_set_frame_target(cpi, target); // Update the total stats remaining structure. @@ -2322,20 +2356,45 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } void vp9_twopass_postencode_update(VP9_COMP *cpi) { -#ifdef DISABLE_RC_LONG_TERM_MEM - const uint64_t bits_used = cpi->rc.this_frame_target; + RATE_CONTROL *const rc = &cpi->rc; +#ifdef LONG_TERM_VBR_CORRECTION + // In this experimental mode, the VBR correction is done exclusively through + // rc->vbr_bits_off_target. Based on the sign of this value, a limited % + // adjustment is made to the target rate of subsequent frames, to try and + // push it back towards 0. This mode is less likely to suffer from + // extreme behaviour at the end of a clip or group of frames. + const int bits_used = rc->base_frame_target; + rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; #else - const uint64_t bits_used = cpi->rc.projected_frame_size; + // In this mode, VBR correction is acheived by altering bits_left, + // kf_group_bits & gf_group_bits to reflect any deviation from the target + // rate in this frame. This alters the allocation of bits to the + // remaning frames in the group / clip. + // + // This method can give rise to unstable behaviour near the end of a clip + // or kf/gf group of frames where any accumulated error is corrected over an + // ever decreasing number of frames. Hence we change the balance of target + // vs. actual bitrate gradually as we progress towards the end of the + // sequence in order to mitigate this effect. + const double progress = + (double)(cpi->twopass.stats_in - cpi->twopass.stats_in_start) / + (cpi->twopass.stats_in_end - cpi->twopass.stats_in_start); + const int bits_used = progress * cpi->rc.this_frame_target + + (1.0 - progress) * cpi->rc.projected_frame_size; #endif + cpi->twopass.bits_left -= bits_used; cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0); - // Update bits left to the kf and gf groups to account for overshoot or - // undershoot on these frames. + +#ifdef LONG_TERM_VBR_CORRECTION + if (cpi->common.frame_type != KEY_FRAME) { +#else if (cpi->common.frame_type == KEY_FRAME) { // For key frames kf_group_bits already had the target bits subtracted out. // So now update to the correct value based on the actual bits used. cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used; } else { +#endif cpi->twopass.kf_group_bits -= bits_used; cpi->twopass.gf_group_bits -= bits_used; cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 1aa250e8b..23a68729b 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -373,122 +373,6 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { } } -static void set_rd_speed_thresholds(VP9_COMP *cpi) { - int i; - RD_OPT *const rd = &cpi->rd; - - // Set baseline threshold values - for (i = 0; i < MAX_MODES; ++i) - rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; - - rd->thresh_mult[THR_NEARESTMV] = 0; - rd->thresh_mult[THR_NEARESTG] = 0; - rd->thresh_mult[THR_NEARESTA] = 0; - - rd->thresh_mult[THR_DC] += 1000; - - rd->thresh_mult[THR_NEWMV] += 1000; - rd->thresh_mult[THR_NEWA] += 1000; - rd->thresh_mult[THR_NEWG] += 1000; - - rd->thresh_mult[THR_NEARMV] += 1000; - rd->thresh_mult[THR_NEARA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - rd->thresh_mult[THR_TM] += 1000; - - rd->thresh_mult[THR_COMP_NEARLA] += 1500; - rd->thresh_mult[THR_COMP_NEWLA] += 2000; - rd->thresh_mult[THR_NEARG] += 1000; - rd->thresh_mult[THR_COMP_NEARGA] += 1500; - rd->thresh_mult[THR_COMP_NEWGA] += 2000; - - rd->thresh_mult[THR_ZEROMV] += 2000; - rd->thresh_mult[THR_ZEROG] += 2000; - rd->thresh_mult[THR_ZEROA] += 2000; - rd->thresh_mult[THR_COMP_ZEROLA] += 2500; - rd->thresh_mult[THR_COMP_ZEROGA] += 2500; - - rd->thresh_mult[THR_H_PRED] += 2000; - rd->thresh_mult[THR_V_PRED] += 2000; - rd->thresh_mult[THR_D45_PRED ] += 2500; - rd->thresh_mult[THR_D135_PRED] += 2500; - rd->thresh_mult[THR_D117_PRED] += 2500; - rd->thresh_mult[THR_D153_PRED] += 2500; - rd->thresh_mult[THR_D207_PRED] += 2500; - rd->thresh_mult[THR_D63_PRED] += 2500; - - /* disable frame modes if flags not set */ - if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - rd->thresh_mult[THR_NEWMV ] = INT_MAX; - rd->thresh_mult[THR_NEARESTMV] = INT_MAX; - rd->thresh_mult[THR_ZEROMV ] = INT_MAX; - rd->thresh_mult[THR_NEARMV ] = INT_MAX; - } - if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - rd->thresh_mult[THR_NEARESTG ] = INT_MAX; - rd->thresh_mult[THR_ZEROG ] = INT_MAX; - rd->thresh_mult[THR_NEARG ] = INT_MAX; - rd->thresh_mult[THR_NEWG ] = INT_MAX; - } - if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - rd->thresh_mult[THR_NEARESTA ] = INT_MAX; - rd->thresh_mult[THR_ZEROA ] = INT_MAX; - rd->thresh_mult[THR_NEARA ] = INT_MAX; - rd->thresh_mult[THR_NEWA ] = INT_MAX; - } - - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != - (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; - } - if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != - (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; - } -} - -static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - const SPEED_FEATURES *const sf = &cpi->sf; - RD_OPT *const rd = &cpi->rd; - int i; - - for (i = 0; i < MAX_REFS; ++i) - rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; - - rd->thresh_mult_sub8x8[THR_LAST] += 2500; - rd->thresh_mult_sub8x8[THR_GOLD] += 2500; - rd->thresh_mult_sub8x8[THR_ALTR] += 2500; - rd->thresh_mult_sub8x8[THR_INTRA] += 2500; - rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; - - // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) - if (sf->disable_split_mask & (1 << i)) - rd->thresh_mult_sub8x8[i] = INT_MAX; - - // disable mode test if frame flag is not set - if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX; - if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; - if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != - (VP9_LAST_FLAG | VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; - if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != - (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; -} static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS @@ -500,8 +384,8 @@ static void set_speed_features(VP9_COMP *cpi) { vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting - set_rd_speed_thresholds(cpi); - set_rd_speed_thresholds_sub8x8(cpi); + vp9_set_rd_speed_thresholds(cpi); + vp9_set_rd_speed_thresholds_sub8x8(cpi); cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { @@ -567,7 +451,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vp9_setup_pc_tree(&cpi->common, &cpi->mb); } - static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -1860,8 +1743,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %10d %10d %10d %10d %10d " - "%10"PRId64" %10"PRId64" %10d " + fprintf(f, "%10u %10d %10d %10d %10d" + "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d " "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" @@ -1870,6 +1753,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { cpi->rc.projected_frame_size, cpi->rc.projected_frame_size / cpi->common.MBs, (cpi->rc.projected_frame_size - cpi->rc.this_frame_target), + cpi->rc.vbr_bits_off_target, cpi->rc.total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target), cpi->rc.total_actual_bits, cm->base_qindex, diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index b63d0da5e..b8d0ec40d 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -85,7 +85,7 @@ void vp9_rc_init_minq_luts() { gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); - inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -1033,6 +1033,7 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { RATE_CONTROL *const rc = &cpi->rc; rc->this_frame_target = target; + // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) / (cm->width * cm->height); diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index fbeca829f..820366119 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -27,7 +27,9 @@ extern "C" { typedef struct { // Rate targetting variables - int this_frame_target; + int base_frame_target; // A baseline frame target before adjustment + // for previous under or over shoot. + int this_frame_target; // Actual frame target after rc adjustment. int projected_frame_size; int sb64_target_rate; int last_q[3]; // Separate values for Intra/Inter/ARF-GF @@ -67,6 +69,7 @@ typedef struct { int64_t buffer_level; int64_t bits_off_target; + int64_t vbr_bits_off_target; int decimation_factor; int decimation_count; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 63c099d75..55ae721be 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4374,3 +4374,120 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } + +void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { + int i; + RD_OPT *const rd = &cpi->rd; + + // Set baseline threshold values + for (i = 0; i < MAX_MODES; ++i) + rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; + + rd->thresh_mult[THR_NEARESTMV] = 0; + rd->thresh_mult[THR_NEARESTG] = 0; + rd->thresh_mult[THR_NEARESTA] = 0; + + rd->thresh_mult[THR_DC] += 1000; + + rd->thresh_mult[THR_NEWMV] += 1000; + rd->thresh_mult[THR_NEWA] += 1000; + rd->thresh_mult[THR_NEWG] += 1000; + + rd->thresh_mult[THR_NEARMV] += 1000; + rd->thresh_mult[THR_NEARA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; + + rd->thresh_mult[THR_TM] += 1000; + + rd->thresh_mult[THR_COMP_NEARLA] += 1500; + rd->thresh_mult[THR_COMP_NEWLA] += 2000; + rd->thresh_mult[THR_NEARG] += 1000; + rd->thresh_mult[THR_COMP_NEARGA] += 1500; + rd->thresh_mult[THR_COMP_NEWGA] += 2000; + + rd->thresh_mult[THR_ZEROMV] += 2000; + rd->thresh_mult[THR_ZEROG] += 2000; + rd->thresh_mult[THR_ZEROA] += 2000; + rd->thresh_mult[THR_COMP_ZEROLA] += 2500; + rd->thresh_mult[THR_COMP_ZEROGA] += 2500; + + rd->thresh_mult[THR_H_PRED] += 2000; + rd->thresh_mult[THR_V_PRED] += 2000; + rd->thresh_mult[THR_D45_PRED ] += 2500; + rd->thresh_mult[THR_D135_PRED] += 2500; + rd->thresh_mult[THR_D117_PRED] += 2500; + rd->thresh_mult[THR_D153_PRED] += 2500; + rd->thresh_mult[THR_D207_PRED] += 2500; + rd->thresh_mult[THR_D63_PRED] += 2500; + + /* disable frame modes if flags not set */ + if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { + rd->thresh_mult[THR_NEWMV ] = INT_MAX; + rd->thresh_mult[THR_NEARESTMV] = INT_MAX; + rd->thresh_mult[THR_ZEROMV ] = INT_MAX; + rd->thresh_mult[THR_NEARMV ] = INT_MAX; + } + if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { + rd->thresh_mult[THR_NEARESTG ] = INT_MAX; + rd->thresh_mult[THR_ZEROG ] = INT_MAX; + rd->thresh_mult[THR_NEARG ] = INT_MAX; + rd->thresh_mult[THR_NEWG ] = INT_MAX; + } + if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { + rd->thresh_mult[THR_NEARESTA ] = INT_MAX; + rd->thresh_mult[THR_ZEROA ] = INT_MAX; + rd->thresh_mult[THR_NEARA ] = INT_MAX; + rd->thresh_mult[THR_NEWA ] = INT_MAX; + } + + if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != + (VP9_LAST_FLAG | VP9_ALT_FLAG)) { + rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + } + if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != + (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { + rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + } +} + +void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { + const SPEED_FEATURES *const sf = &cpi->sf; + RD_OPT *const rd = &cpi->rd; + int i; + + for (i = 0; i < MAX_REFS; ++i) + rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; + + rd->thresh_mult_sub8x8[THR_LAST] += 2500; + rd->thresh_mult_sub8x8[THR_GOLD] += 2500; + rd->thresh_mult_sub8x8[THR_ALTR] += 2500; + rd->thresh_mult_sub8x8[THR_INTRA] += 2500; + rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; + rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + + // Check for masked out split cases. + for (i = 0; i < MAX_REFS; i++) + if (sf->disable_split_mask & (1 << i)) + rd->thresh_mult_sub8x8[i] = INT_MAX; + + // disable mode test if frame flag is not set + if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) + rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) + rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != + (VP9_LAST_FLAG | VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != + (VP9_GOLD_FLAG | VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; +} diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index a01dbd4d3..e48566499 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -83,6 +83,10 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16]); +void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); + +void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index c98c9d415..a5694161c 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -27,8 +27,6 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" -#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering - static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, @@ -122,8 +120,6 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, } } -#if ALT_REF_MC_ENABLED - static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, uint8_t *arf_frame_buf, uint8_t *frame_ptr_buf, @@ -133,6 +129,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; + int distortion; + unsigned int sse; MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -154,26 +152,19 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, step_param = cpi->sf.reduce_first_step_size + (cpi->oxcf.speed > 5 ? 1 : 0); step_param = MIN(step_param, cpi->sf.max_step_search_steps - 2); - /*cpi->sf.search_method == HEX*/ // Ignore mv costing by sending NULL pointer instead of cost arrays vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv); - // Try sub-pixel MC? - // if (bestsme > error_thresh && bestsme < INT_MAX) - { - int distortion; - unsigned int sse; - // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, ref_mv, - &best_ref_mv1, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[BLOCK_16X16], - 0, cpi->sf.subpel_iters_per_step, - NULL, NULL, - &distortion, &sse); - } + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step(x, ref_mv, + &best_ref_mv1, + cpi->common.allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[BLOCK_16X16], + 0, cpi->sf.subpel_iters_per_step, + NULL, NULL, + &distortion, &sse); // Restore input state x->plane[0].src = src; @@ -181,7 +172,6 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, return bestsme; } -#endif static void temporal_filter_iterate_c(VP9_COMP *cpi, int frame_count, @@ -215,8 +205,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, input_buffer[i] = mbd->plane[i].pre[0].buf; for (mb_row = 0; mb_row < mb_rows; mb_row++) { -#if ALT_REF_MC_ENABLED - // Source frames are extended to 16 pixels. This is different than + // Source frames are extended to 16 pixels. This is different than // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS) // A 6/8 tap filter is used for motion search. This requires 2 pixels // before and 3 pixels after. So the largest Y mv on a border would @@ -230,7 +219,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND); -#endif for (mb_col = 0; mb_col < mb_cols; mb_col++) { int i, j, k; @@ -239,13 +227,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); -#if ALT_REF_MC_ENABLED cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND); -#endif for (frame = 0; frame < frame_count; frame++) { + const int thresh_low = 10000; + const int thresh_high = 20000; + if (cpi->frames[frame] == NULL) continue; @@ -255,38 +244,31 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (frame == alt_ref_index) { filter_weight = 2; } else { - int err = 0; -#if ALT_REF_MC_ENABLED -#define THRESH_LOW 10000 -#define THRESH_HIGH 20000 - // Find best match in this frame by MC - err = temporal_filter_find_matching_mb_c - (cpi, - cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->y_stride); -#endif + int err = temporal_filter_find_matching_mb_c(cpi, + cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_stride); + // Assign higher weight to matching MB if it's error // score is lower. If not applying MC default behavior // is to weight all MBs equal. - filter_weight = err < THRESH_LOW - ? 2 : err < THRESH_HIGH ? 1 : 0; + filter_weight = err < thresh_low + ? 2 : err < thresh_high ? 1 : 0; } if (filter_weight != 0) { // Construct the predictors - temporal_filter_predictors_mb_c - (mbd, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->u_buffer + mb_uv_offset, - cpi->frames[frame]->v_buffer + mb_uv_offset, - cpi->frames[frame]->y_stride, - mb_uv_height, - mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, - mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, - predictor, scale, - mb_col * 16, mb_row * 16); + temporal_filter_predictors_mb_c(mbd, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mb_uv_height, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, + predictor, scale, + mb_col * 16, mb_row * 16); // Apply the filter (YUV) vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, @@ -320,7 +302,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // move to next pixel byte++; } - byte += stride - 16; } @@ -347,14 +328,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // move to next pixel byte++; } - byte += stride - mb_uv_height; } - mb_y_offset += 16; mb_uv_offset += mb_uv_height; } - mb_y_offset += 16 * (f->y_stride - mb_cols); mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols); } @@ -366,18 +344,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { VP9_COMMON *const cm = &cpi->common; - int frame = 0; - int frames_to_blur_backward = 0; int frames_to_blur_forward = 0; int frames_to_blur = 0; int start_frame = 0; - int strength = cpi->active_arnr_strength; int blur_type = cpi->oxcf.arnr_type; int max_frames = cpi->active_arnr_frames; - const int num_frames_backward = distance; const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - (num_frames_backward + 1); diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 71867a938..a31a476f3 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -18,63 +18,34 @@ #include "vp9/encoder/vp9_variance.h" -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { int i, j; - int diff; *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { - diff = src_ptr[j] - ref_ptr[j]; + const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } - src_ptr += source_stride; - ref_ptr += recon_stride; + a += a_stride; + b += b_stride; } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : uint8_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// first-pass of 2-D separable filter. +// +// Produces int32_t output to retain precision for next pass. Two filter taps +// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is +// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It +// defines the offset required to move from one input to the next. static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : int32_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by - * filter_block2d_bil_first_pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// second-pass of 2-D separable filter. +// +// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two +// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the +// filter is applied horizontally (pixel_step=1) or vertically (pixel_step= +// stride). It defines the offset required to move from one input to the next. static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, @@ -156,9 +103,8 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; - for (i = 0; i < 256; i++) { - sum += (src_ptr[i] * src_ptr[i]); - } + for (i = 0; i < 256; i++) + sum += src_ptr[i] * src_ptr[i]; return sum; } @@ -183,12 +129,10 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -205,13 +149,11 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -240,12 +182,10 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -262,13 +202,11 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -297,12 +235,10 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -319,13 +255,11 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -354,12 +288,10 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -376,13 +308,11 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -582,17 +512,12 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); @@ -607,18 +532,13 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, unsigned int *sse, const uint8_t *second_pred) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); @@ -631,17 +551,14 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -653,18 +570,17 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -675,12 +591,10 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -699,11 +613,9 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, const uint8_t *second_pred) { uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -722,10 +634,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -742,13 +652,11 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -764,12 +672,10 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -786,13 +692,11 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -928,12 +832,10 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -950,13 +852,11 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -974,10 +874,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -994,13 +892,11 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -1016,12 +912,10 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1038,13 +932,11 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1060,14 +952,12 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be // of this big? same issue appears in all other block size settings. uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1084,13 +974,11 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1106,9 +994,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); } comp_pred += width; pred += width; diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c9e39a1a2..4c8be71cd 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -17,14 +17,10 @@ extern "C" { #endif -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum); +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, + unsigned int *sse, int *sum); typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, |