diff options
-rw-r--r-- | test/borders_test.cc | 1 | ||||
-rw-r--r-- | test/cpu_speed_test.cc | 1 | ||||
-rw-r--r-- | test/webm_video_source.h | 1 | ||||
-rw-r--r-- | vp9/decoder/vp9_dsubexp.c | 16 | ||||
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_block.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 86 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_if.c | 211 | ||||
-rw-r--r-- | vp9/encoder/vp9_onyx_int.h | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 75 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.h | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 160 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 10 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 216 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.c | 343 | ||||
-rw-r--r-- | vp9/encoder/vp9_variance.h | 12 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 64 | ||||
-rw-r--r-- | vpxenc.c | 4 |
21 files changed, 559 insertions, 707 deletions
diff --git a/test/borders_test.cc b/test/borders_test.cc index b30be4580..a2f5a1bdd 100644 --- a/test/borders_test.cc +++ b/test/borders_test.cc @@ -35,7 +35,6 @@ class BordersTest : public ::libvpx_test::EncoderTest, encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); - encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc index be651b4fe..ca201bbc7 100644 --- a/test/cpu_speed_test.cc +++ b/test/cpu_speed_test.cc @@ -37,7 +37,6 @@ class CpuSpeedTest : public ::libvpx_test::EncoderTest, encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); - encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } diff --git a/test/webm_video_source.h b/test/webm_video_source.h index dc76bb007..11d3d234d 100644 --- a/test/webm_video_source.h +++ b/test/webm_video_source.h @@ -37,6 +37,7 @@ class WebMVideoSource : public CompressedVideoSource { virtual ~WebMVideoSource() { if (vpx_ctx_->file != NULL) fclose(vpx_ctx_->file); + webm_free(webm_ctx_); delete vpx_ctx_; delete webm_ctx_; } diff --git a/vp9/decoder/vp9_dsubexp.c b/vp9/decoder/vp9_dsubexp.c index e67b37240..c22617edb 100644 --- a/vp9/decoder/vp9_dsubexp.c +++ b/vp9/decoder/vp9_dsubexp.c @@ -26,22 +26,6 @@ static int decode_uniform(vp9_reader *r) { return v < m ? v : (v << 1) - m + vp9_read_bit(r); } - -static int merge_index(int v, int n, int modulus) { - int max1 = (n - 1 - modulus / 2) / modulus + 1; - if (v < max1) { - v = v * modulus + modulus / 2; - } else { - int w; - v -= max1; - w = v; - v += (v + modulus - modulus / 2) / modulus; - while (v % modulus == modulus / 2 || - w != v - (v + modulus - modulus / 2) / modulus) v++; - } - return v; -} - static int inv_remap_prob(int v, int m) { static int inv_map_table[MAX_PROB - 1] = { 6, 19, 32, 45, 58, 71, 84, 97, 110, 123, 136, 149, 162, 175, 188, diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 323c10350..d1437d377 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -136,7 +136,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, const int xmis = MIN(cm->mi_cols - mi_col, bw); const int ymis = MIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; - const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd); + const int refresh_this_block = cpi->mb.in_static_area || + candidate_refresh_aq(cr, mbmi, bsize, use_rd); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index cad124125..fcf2a0420 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -162,6 +162,8 @@ struct macroblock { // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; + int in_static_area; + int optimize; // indicate if it is in the rd search loop or encoding process diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f88ce2d1a..9956acc0b 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1273,6 +1273,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi, static int is_background(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col) { + MACROBLOCK *x = &cpi->mb; uint8_t *src, *pre; int src_stride, pre_stride; @@ -1304,7 +1305,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile, threshold = (row8x8_remaining * col8x8_remaining) << 6; } - return (this_sad < 2 * threshold); + x->in_static_area = (this_sad < 2 * threshold); + return x->in_static_area; } static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) { @@ -2433,6 +2435,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, set_offsets(cpi, tile, mi_row, mi_col, bsize); xd->mi[0]->mbmi.sb_type = bsize; + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { + if (xd->mi[0]->mbmi.segment_id && x->in_static_area) + x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); + } + if (!frame_is_intra_only(cm)) { vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize); @@ -2856,7 +2863,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &x->e_mbd; int mi_col; // Initialize the left context for the new SB row @@ -2874,8 +2882,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; BLOCK_SIZE bsize; - cpi->mb.source_variance = UINT_MAX; - vp9_zero(cpi->mb.pred_mv); + x->in_static_area = 0; + x->source_variance = UINT_MAX; + vp9_zero(x->pred_mv); // Set the partition type of the 64X64 block switch (cpi->sf.partition_search_type) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e250c20f8..7a85a8e85 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -61,7 +61,7 @@ #define MIN_GF_INTERVAL 4 #endif -#define DISABLE_RC_LONG_TERM_MEM +#define LONG_TERM_VBR_CORRECTION static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { YV12_BUFFER_CONFIG temp = *a; @@ -336,7 +336,8 @@ static double simple_weight(const YV12_BUFFER_CONFIG *buf) { } // This function returns the maximum target rate per frame. -static int frame_max_bits(const RATE_CONTROL *rc, const VP9_CONFIG *oxcf) { +static int frame_max_bits(const RATE_CONTROL *rc, + const VP9EncoderConfig *oxcf) { int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth * (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) @@ -907,7 +908,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, const FIRSTPASS_STATS *stats, int section_target_bandwidth) { const RATE_CONTROL *const rc = &cpi->rc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; if (section_target_bandwidth <= 0) { return rc->worst_quality; // Highest value allowed @@ -932,7 +933,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi, } // Restriction on active max q for constrained quality mode. - if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) + if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY) q = MAX(q, oxcf->cq_level); return q; } @@ -942,7 +943,7 @@ extern void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_init_second_pass(VP9_COMP *cpi) { SVC *const svc = &cpi->svc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int is_spatial_svc = (svc->number_spatial_layers > 1) && (svc->number_temporal_layers == 1); struct twopass_rc *const twopass = is_spatial_svc ? @@ -1035,6 +1036,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { reset_fpf_position(twopass, start_pos); } + + // Reset the vbr bits off target counter + cpi->rc.vbr_bits_off_target = 0; } // This function gives an estimate of how badly we believe the prediction @@ -1403,7 +1407,7 @@ void define_fixed_arf_period(VP9_COMP *cpi) { // Analyse and define a gf/arf group. static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { RATE_CONTROL *const rc = &cpi->rc; - VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; struct twopass_rc *const twopass = &cpi->twopass; FIRSTPASS_STATS next_frame = { 0 }; const FIRSTPASS_STATS *start_pos; @@ -2194,6 +2198,23 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) { cpi->rc.frames_to_key = INT_MAX; } +// For VBR...adjustment to the frame target based on error from previous frames +void vbr_rate_correction(int * this_frame_target, + const int64_t vbr_bits_off_target) { + int max_delta = (*this_frame_target * 15) / 100; + + // vbr_bits_off_target > 0 means we have extra bits to spend + if (vbr_bits_off_target > 0) { + *this_frame_target += + (vbr_bits_off_target > max_delta) ? max_delta + : (int)vbr_bits_off_target; + } else { + *this_frame_target -= + (vbr_bits_off_target < -max_delta) ? max_delta + : (int)-vbr_bits_off_target; + } +} + void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -2221,8 +2242,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { return; if (cpi->refresh_alt_ref_frame) { + int modified_target = twopass->gf_bits; + rc->base_frame_target = twopass->gf_bits; cm->frame_type = INTER_FRAME; - vp9_rc_set_frame_target(cpi, twopass->gf_bits); +#ifdef LONG_TERM_VBR_CORRECTION + // Correction to rate target based on prior over or under shoot. + if (cpi->oxcf.rc_mode == RC_MODE_VBR) + vbr_rate_correction(&modified_target, rc->vbr_bits_off_target); +#endif + vp9_rc_set_frame_target(cpi, modified_target); return; } @@ -2233,7 +2261,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; } - if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { + if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) { twopass->active_worst_quality = cpi->oxcf.cq_level; } else if (cm->current_video_frame == 0 || (is_spatial_svc && lc->current_video_frame_in_layer == 0)) { @@ -2317,6 +2345,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target); else target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target); + + rc->base_frame_target = target; +#ifdef LONG_TERM_VBR_CORRECTION + // Correction to rate target based on prior over or under shoot. + if (cpi->oxcf.rc_mode == RC_MODE_VBR) + vbr_rate_correction(&target, rc->vbr_bits_off_target); +#endif vp9_rc_set_frame_target(cpi, target); // Update the total stats remaining structure. @@ -2324,20 +2359,45 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { } void vp9_twopass_postencode_update(VP9_COMP *cpi) { -#ifdef DISABLE_RC_LONG_TERM_MEM - const uint64_t bits_used = cpi->rc.this_frame_target; + RATE_CONTROL *const rc = &cpi->rc; +#ifdef LONG_TERM_VBR_CORRECTION + // In this experimental mode, the VBR correction is done exclusively through + // rc->vbr_bits_off_target. Based on the sign of this value, a limited % + // adjustment is made to the target rate of subsequent frames, to try and + // push it back towards 0. This mode is less likely to suffer from + // extreme behaviour at the end of a clip or group of frames. + const int bits_used = rc->base_frame_target; + rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; #else - const uint64_t bits_used = cpi->rc.projected_frame_size; + // In this mode, VBR correction is acheived by altering bits_left, + // kf_group_bits & gf_group_bits to reflect any deviation from the target + // rate in this frame. This alters the allocation of bits to the + // remaning frames in the group / clip. + // + // This method can give rise to unstable behaviour near the end of a clip + // or kf/gf group of frames where any accumulated error is corrected over an + // ever decreasing number of frames. Hence we change the balance of target + // vs. actual bitrate gradually as we progress towards the end of the + // sequence in order to mitigate this effect. + const double progress = + (double)(cpi->twopass.stats_in - cpi->twopass.stats_in_start) / + (cpi->twopass.stats_in_end - cpi->twopass.stats_in_start); + const int bits_used = progress * cpi->rc.this_frame_target + + (1.0 - progress) * cpi->rc.projected_frame_size; #endif + cpi->twopass.bits_left -= bits_used; cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0); - // Update bits left to the kf and gf groups to account for overshoot or - // undershoot on these frames. + +#ifdef LONG_TERM_VBR_CORRECTION + if (cpi->common.frame_type != KEY_FRAME) { +#else if (cpi->common.frame_type == KEY_FRAME) { // For key frames kf_group_bits already had the target bits subtracted out. // So now update to the correct value based on the actual bits used. cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used; } else { +#endif cpi->twopass.kf_group_bits -= bits_used; cpi->twopass.gf_group_bits -= bits_used; cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0); diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 0aba911b8..c94fff2d3 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -131,6 +131,24 @@ static void setup_inter_frame(VP9_COMMON *cm) { cm->fc = cm->frame_contexts[cm->frame_context_idx]; } +static void setup_frame(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + // Set up entropy context depending on frame type. The decoder mandates + // the use of the default context, index 0, for keyframes and inter + // frames where the error_resilient_mode or intra_only flag is set. For + // other inter-frames the encoder currently uses only two contexts; + // context 1 for ALTREF frames and context 0 for the others. + if (cm->frame_type == KEY_FRAME) { + setup_key_frame(cpi); + } else { + if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) + cm->frame_context_idx = cpi->refresh_alt_ref_frame; + setup_inter_frame(cm); + } +} + + + void vp9_initialize_enc() { static int init_done = 0; @@ -373,122 +391,6 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) { } } -static void set_rd_speed_thresholds(VP9_COMP *cpi) { - int i; - RD_OPT *const rd = &cpi->rd; - - // Set baseline threshold values - for (i = 0; i < MAX_MODES; ++i) - rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; - - rd->thresh_mult[THR_NEARESTMV] = 0; - rd->thresh_mult[THR_NEARESTG] = 0; - rd->thresh_mult[THR_NEARESTA] = 0; - - rd->thresh_mult[THR_DC] += 1000; - - rd->thresh_mult[THR_NEWMV] += 1000; - rd->thresh_mult[THR_NEWA] += 1000; - rd->thresh_mult[THR_NEWG] += 1000; - - rd->thresh_mult[THR_NEARMV] += 1000; - rd->thresh_mult[THR_NEARA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; - rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; - - rd->thresh_mult[THR_TM] += 1000; - - rd->thresh_mult[THR_COMP_NEARLA] += 1500; - rd->thresh_mult[THR_COMP_NEWLA] += 2000; - rd->thresh_mult[THR_NEARG] += 1000; - rd->thresh_mult[THR_COMP_NEARGA] += 1500; - rd->thresh_mult[THR_COMP_NEWGA] += 2000; - - rd->thresh_mult[THR_ZEROMV] += 2000; - rd->thresh_mult[THR_ZEROG] += 2000; - rd->thresh_mult[THR_ZEROA] += 2000; - rd->thresh_mult[THR_COMP_ZEROLA] += 2500; - rd->thresh_mult[THR_COMP_ZEROGA] += 2500; - - rd->thresh_mult[THR_H_PRED] += 2000; - rd->thresh_mult[THR_V_PRED] += 2000; - rd->thresh_mult[THR_D45_PRED ] += 2500; - rd->thresh_mult[THR_D135_PRED] += 2500; - rd->thresh_mult[THR_D117_PRED] += 2500; - rd->thresh_mult[THR_D153_PRED] += 2500; - rd->thresh_mult[THR_D207_PRED] += 2500; - rd->thresh_mult[THR_D63_PRED] += 2500; - - /* disable frame modes if flags not set */ - if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { - rd->thresh_mult[THR_NEWMV ] = INT_MAX; - rd->thresh_mult[THR_NEARESTMV] = INT_MAX; - rd->thresh_mult[THR_ZEROMV ] = INT_MAX; - rd->thresh_mult[THR_NEARMV ] = INT_MAX; - } - if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { - rd->thresh_mult[THR_NEARESTG ] = INT_MAX; - rd->thresh_mult[THR_ZEROG ] = INT_MAX; - rd->thresh_mult[THR_NEARG ] = INT_MAX; - rd->thresh_mult[THR_NEWG ] = INT_MAX; - } - if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { - rd->thresh_mult[THR_NEARESTA ] = INT_MAX; - rd->thresh_mult[THR_ZEROA ] = INT_MAX; - rd->thresh_mult[THR_NEARA ] = INT_MAX; - rd->thresh_mult[THR_NEWA ] = INT_MAX; - } - - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != - (VP9_LAST_FLAG | VP9_ALT_FLAG)) { - rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; - } - if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != - (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { - rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; - rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; - rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; - } -} - -static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { - const SPEED_FEATURES *const sf = &cpi->sf; - RD_OPT *const rd = &cpi->rd; - int i; - - for (i = 0; i < MAX_REFS; ++i) - rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; - - rd->thresh_mult_sub8x8[THR_LAST] += 2500; - rd->thresh_mult_sub8x8[THR_GOLD] += 2500; - rd->thresh_mult_sub8x8[THR_ALTR] += 2500; - rd->thresh_mult_sub8x8[THR_INTRA] += 2500; - rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; - rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; - - // Check for masked out split cases. - for (i = 0; i < MAX_REFS; i++) - if (sf->disable_split_mask & (1 << i)) - rd->thresh_mult_sub8x8[i] = INT_MAX; - - // disable mode test if frame flag is not set - if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) - rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX; - if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) - rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; - if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; - if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != - (VP9_LAST_FLAG | VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; - if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != - (VP9_GOLD_FLAG | VP9_ALT_FLAG)) - rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; -} static void set_speed_features(VP9_COMP *cpi) { #if CONFIG_INTERNAL_STATS @@ -500,8 +402,8 @@ static void set_speed_features(VP9_COMP *cpi) { vp9_set_speed_features(cpi); // Set rd thresholds based on mode and speed setting - set_rd_speed_thresholds(cpi); - set_rd_speed_thresholds_sub8x8(cpi); + vp9_set_rd_speed_thresholds(cpi); + vp9_set_rd_speed_thresholds_sub8x8(cpi); cpi->mb.fwd_txm4x4 = vp9_fdct4x4; if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) { @@ -511,7 +413,7 @@ static void set_speed_features(VP9_COMP *cpi) { static void alloc_raw_frame_buffers(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, cm->subsampling_x, cm->subsampling_y, @@ -567,7 +469,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { vp9_setup_pc_tree(&cpi->common, &cpi->mb); } - static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; @@ -633,7 +534,7 @@ static void set_tile_limits(VP9_COMP *cpi) { cm->log2_tile_rows = cpi->oxcf.tile_rows; } -static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { +static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; int i; @@ -654,7 +555,7 @@ static void init_config(struct VP9_COMP *cpi, VP9_CONFIG *oxcf) { cpi->svc.number_temporal_layers = oxcf->ts_number_layers; if ((cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + cpi->oxcf.rc_mode == RC_MODE_CBR) || (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) { vp9_init_layer_context(cpi); @@ -693,7 +594,7 @@ static int get_pass(MODE mode) { return -1; } -void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { +void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -741,7 +642,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cpi->encode_breakout = cpi->oxcf.encode_breakout; // local file playback mode == really big buffer - if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { + if (cpi->oxcf.rc_mode == RC_MODE_VBR) { cpi->oxcf.starting_buffer_level = 60000; cpi->oxcf.optimal_buffer_level = 60000; cpi->oxcf.maximum_buffer_size = 240000; @@ -785,8 +686,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { cm->display_width = cpi->oxcf.width; cm->display_height = cpi->oxcf.height; - cm->lf.sharpness_level = cpi->oxcf.sharpness; - if (cpi->initial_width) { // Increasing the size of the frame beyond the first seen frame, or some // otherwise signaled maximum size, is not supported. @@ -797,7 +696,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) { update_frame_size(cpi); if ((cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + cpi->oxcf.rc_mode == RC_MODE_CBR) || (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); @@ -865,7 +764,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { } -VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) { +VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { int i, j; VP9_COMP *const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *const cm = cpi != NULL ? &cpi->common : NULL; @@ -1683,7 +1582,7 @@ static int recode_loop_test(const VP9_COMP *cpi, int q, int maxq, int minq) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; int force_recode = 0; // Special case trap if maximum allowed frame size exceeded. @@ -1701,7 +1600,7 @@ static int recode_loop_test(const VP9_COMP *cpi, if ((rc->projected_frame_size > high_limit && q < maxq) || (rc->projected_frame_size < low_limit && q > minq)) { force_recode = 1; - } else if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) { + } else if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY) { // Deal with frame undershoot and whether or not we are // below the automatically set cq level. if (q > oxcf->cq_level && @@ -1862,8 +1761,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); if (cpi->twopass.total_left_stats.coded_error != 0.0) - fprintf(f, "%10u %10d %10d %10d %10d %10d " - "%10"PRId64" %10"PRId64" %10d " + fprintf(f, "%10u %10d %10d %10d %10d" + "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d " "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" @@ -1872,6 +1771,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { cpi->rc.projected_frame_size, cpi->rc.projected_frame_size / cpi->common.MBs, (cpi->rc.projected_frame_size - cpi->rc.this_frame_target), + cpi->rc.vbr_bits_off_target, cpi->rc.total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target), cpi->rc.total_actual_bits, cm->base_qindex, @@ -1916,20 +1816,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; vp9_clear_system_state(); vp9_set_quantizer(cm, q); - - // Set up entropy context depending on frame type. The decoder mandates - // the use of the default context, index 0, for keyframes and inter - // frames where the error_resilient_mode or intra_only flag is set. For - // other inter-frames the encoder currently uses only two contexts; - // context 1 for ALTREF frames and context 0 for the others. - if (cm->frame_type == KEY_FRAME) { - setup_key_frame(cpi); - } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) - cm->frame_context_idx = cpi->refresh_alt_ref_frame; - - setup_inter_frame(cm); - } + setup_frame(cpi); // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -1974,21 +1861,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi, vp9_set_quantizer(cm, q); - if (loop_count == 0) { - // Set up entropy context depending on frame type. The decoder mandates - // the use of the default context, index 0, for keyframes and inter - // frames where the error_resilient_mode or intra_only flag is set. For - // other inter-frames the encoder currently uses only two contexts; - // context 1 for ALTREF frames and context 0 for the others. - if (cm->frame_type == KEY_FRAME) { - setup_key_frame(cpi); - } else { - if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc) - cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame; - - setup_inter_frame(cm); - } - } + if (loop_count == 0) + setup_frame(cpi); // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. @@ -2023,7 +1897,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, frame_over_shoot_limit = 1; } - if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { + if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) { loop = 0; } else { if ((cm->frame_type == KEY_FRAME) && @@ -2121,7 +1995,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. - if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && + if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY && q < q_low) { q_low = q; } @@ -2280,7 +2154,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { - setup_key_frame(cpi); // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); @@ -2320,7 +2193,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame. if (cpi->pass == 0 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && + cpi->oxcf.rc_mode == RC_MODE_CBR && cm->frame_type != KEY_FRAME) { if (vp9_rc_drop_frame(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); @@ -2517,7 +2390,7 @@ static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (cpi->oxcf.rc_mode == RC_MODE_CBR) { vp9_rc_get_one_pass_cbr_params(cpi); } else { vp9_rc_get_one_pass_vbr_params(cpi); @@ -2808,7 +2681,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } if (cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + cpi->oxcf.rc_mode == RC_MODE_CBR) { vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -2841,7 +2714,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (cpi->pass == 2 && cm->current_video_frame == 0 && cpi->oxcf.allow_spatial_resampling && - cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { + cpi->oxcf.rc_mode == RC_MODE_VBR) { // Internal scaling is triggered on the first frame. vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width, cpi->oxcf.scaled_frame_height); @@ -2900,7 +2773,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Save layer specific state. if ((cpi->svc.number_temporal_layers > 1 && - cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) || + cpi->oxcf.rc_mode == RC_MODE_CBR) || (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) { vp9_save_layer_context(cpi); } diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7cad0cc04..4e78ba9a5 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -132,11 +132,11 @@ typedef enum { } VPX_SCALING; typedef enum { - USAGE_LOCAL_FILE_PLAYBACK = 0, - USAGE_STREAM_FROM_SERVER = 1, - USAGE_CONSTRAINED_QUALITY = 2, - USAGE_CONSTANT_QUALITY = 3, -} END_USAGE; + RC_MODE_VBR = 0, + RC_MODE_CBR = 1, + RC_MODE_CONSTRAINED_QUALITY = 2, + RC_MODE_CONSTANT_QUALITY = 3, +} RC_MODE; typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the @@ -186,7 +186,7 @@ typedef enum { } AQ_MODE; -typedef struct VP9_CONFIG { +typedef struct VP9EncoderConfig { BITSTREAM_PROFILE profile; BIT_DEPTH bit_depth; int width; // width of data passed to the compressor @@ -210,7 +210,7 @@ typedef struct VP9_CONFIG { // ---------------------------------------------------------------- // DATARATE CONTROL OPTIONS - END_USAGE end_usage; // vbr or cbr + RC_MODE rc_mode; // vbr, cbr, constrained quality or constant quality // buffer targeting aggressiveness int under_shoot_pct; @@ -276,7 +276,6 @@ typedef struct VP9_CONFIG { int arnr_max_frames; int arnr_strength; - int arnr_type; int tile_columns; int tile_rows; @@ -285,7 +284,7 @@ typedef struct VP9_CONFIG { struct vpx_codec_pkt_list *output_pkt_list; vp8e_tuning tuning; -} VP9_CONFIG; +} VP9EncoderConfig; static INLINE int is_best_mode(MODE mode) { return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST; @@ -321,7 +320,7 @@ typedef struct VP9_COMP { QUANTS quants; MACROBLOCK mb; VP9_COMMON common; - VP9_CONFIG oxcf; + VP9EncoderConfig oxcf; struct lookahead_ctx *lookahead; struct lookahead_entry *source; #if CONFIG_MULTIPLE_ARF @@ -521,10 +520,10 @@ typedef struct VP9_COMP { void vp9_initialize_enc(); -struct VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf); +struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf); void vp9_remove_compressor(VP9_COMP *cpi); -void vp9_change_config(VP9_COMP *cpi, const VP9_CONFIG *oxcf); +void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); // receive a frames worth of data. caller can assume that a copy of this // frame is made and not just a copy of the pointer.. diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index ae7713570..2e35e5fbc 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -85,7 +85,7 @@ void vp9_rc_init_minq_luts() { gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50); afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33); afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55); - inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55); + inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75); } } @@ -134,7 +134,7 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; if (oxcf->rc_max_intra_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; @@ -167,7 +167,7 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { // Update the buffer level: leaky bucket model. static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; // Non-viewable frames are a special case and are treated as pure overhead. @@ -181,13 +181,13 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; - if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) { update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } -void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) { - if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { +void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { + if (pass == 0 && oxcf->rc_mode == RC_MODE_CBR) { rc->avg_frame_qindex[0] = oxcf->worst_allowed_q; rc->avg_frame_qindex[1] = oxcf->worst_allowed_q; rc->avg_frame_qindex[2] = oxcf->worst_allowed_q; @@ -237,7 +237,7 @@ void vp9_rc_init(const VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc) { } int vp9_rc_drop_frame(VP9_COMP *cpi) { - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; if (!oxcf->drop_frames_water_mark) { @@ -280,7 +280,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && !cpi->rc.is_src_frame_alt_ref && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) + !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR)) return cpi->rc.gf_rate_correction_factor; else return cpi->rc.rate_correction_factor; @@ -293,7 +293,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && !cpi->rc.is_src_frame_alt_ref && - !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) + !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR)) cpi->rc.gf_rate_correction_factor = factor; else cpi->rc.rate_correction_factor = factor; @@ -444,7 +444,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). const VP9_COMMON *const cm = &cpi->common; - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. int64_t critical_level = oxcf->optimal_buffer_level >> 2; @@ -612,7 +612,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int cq_level = oxcf->cq_level; int active_best_quality; int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); @@ -671,7 +671,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, q = rc->avg_frame_qindex[KEY_FRAME]; } // For constrained quality dont allow Q less than the cq level - if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) { if (q < cq_level) q = cq_level; if (rc->frames_since_key > 1) { @@ -688,7 +688,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; - } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; } else { @@ -708,7 +708,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, gf_low_motion_minq, gf_high_motion_minq); } } else { - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { active_best_quality = cq_level; } else { // Use the lower of active_worst_quality and recent/average Q. @@ -718,7 +718,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; // For the constrained quality mode we don't want // q to fall below the cq level. - if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) && + if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) && (active_best_quality < cq_level)) { active_best_quality = cq_level; } @@ -755,7 +755,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, } #endif - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { q = active_best_quality; // Special case code to try and match quality with forced key frames } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) { @@ -774,7 +774,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, #if CONFIG_MULTIPLE_ARF // Force the quantizer determined by the coding order pattern. if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && - cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) { + cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) { double new_q; double current_q = vp9_convert_qindex_to_q(active_worst_quality); int level = cpi->this_frame_weight; @@ -801,7 +801,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int cq_level = oxcf->cq_level; int active_best_quality; int active_worst_quality = cpi->twopass.active_worst_quality; @@ -862,7 +862,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, q = active_worst_quality; } // For constrained quality dont allow Q less than the cq level - if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) { if (q < cq_level) q = cq_level; if (rc->frames_since_key > 1) { @@ -879,7 +879,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; - } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; } else { @@ -899,14 +899,14 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, gf_low_motion_minq, gf_high_motion_minq); } } else { - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { active_best_quality = cq_level; } else { active_best_quality = inter_minq[active_worst_quality]; // For the constrained quality mode we don't want // q to fall below the cq level. - if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) && + if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) && (active_best_quality < cq_level)) { active_best_quality = cq_level; } @@ -932,7 +932,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, active_worst_quality, 2.0); } else if (!rc->is_src_frame_alt_ref && - (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) && + (oxcf->rc_mode != RC_MODE_CBR) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, active_worst_quality, 1.75); @@ -942,7 +942,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, } #endif - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { + if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) { q = active_best_quality; // Special case code to try and match quality with forced key frames. } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) { @@ -961,7 +961,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, #if CONFIG_MULTIPLE_ARF // Force the quantizer determined by the coding order pattern. if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) && - cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) { + cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) { double new_q; double current_q = vp9_convert_qindex_to_q(active_worst_quality); int level = cpi->this_frame_weight; @@ -987,7 +987,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, int *top_index) { int q; if (cpi->pass == 0) { - if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) + if (cpi->oxcf.rc_mode == RC_MODE_CBR) q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index); else q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index); @@ -1015,7 +1015,7 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { - if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) { + if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) { *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { @@ -1033,6 +1033,7 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { RATE_CONTROL *const rc = &cpi->rc; rc->this_frame_target = target; + // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) / (cm->width * cm->height); @@ -1079,7 +1080,7 @@ static void update_golden_frame_stats(VP9_COMP *cpi) { void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; const int qindex = cm->base_qindex; @@ -1089,7 +1090,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors( cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF || - oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0); + oxcf->rc_mode == RC_MODE_CBR) ? 2 : 0); // Keep a record of last Q and ambient average Q. if (cm->frame_type == KEY_FRAME) { @@ -1098,7 +1099,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) && - !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) { + !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) { rc->last_q[2] = qindex; rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2); @@ -1238,7 +1239,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { } static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; const SVC *const svc = &cpi->svc; const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level; @@ -1246,7 +1247,7 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target = rc->avg_frame_bandwidth; if (svc->number_temporal_layers > 1 && - oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + oxcf->rc_mode == RC_MODE_CBR) { // Note that for layers, avg_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). @@ -1269,7 +1270,7 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const RATE_CONTROL *rc = &cpi->rc; - const VP9_CONFIG *oxcf = &cpi->oxcf; + const VP9EncoderConfig *oxcf = &cpi->oxcf; const SVC *const svc = &cpi->svc; int target; if (cpi->common.current_video_frame == 0) { @@ -1279,7 +1280,7 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { int kf_boost = 32; double framerate = oxcf->framerate; if (svc->number_temporal_layers > 1 && - oxcf->end_usage == USAGE_STREAM_FROM_SERVER) { + oxcf->rc_mode == RC_MODE_CBR) { // Use the layer framerate for temporal layers CBR mode. const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id]; framerate = lc->framerate; @@ -1304,12 +1305,12 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->key_frame_frequency == 0))) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) { target = calc_iframe_target_size_one_pass_cbr(cpi); } } else { cm->frame_type = INTER_FRAME; - if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) { target = calc_pframe_target_size_one_pass_cbr(cpi); } } @@ -1388,7 +1389,7 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, void vp9_rc_update_framerate(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; int vbr_max_bits; diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h index 036470001..820366119 100644 --- a/vp9/encoder/vp9_ratectrl.h +++ b/vp9/encoder/vp9_ratectrl.h @@ -27,7 +27,9 @@ extern "C" { typedef struct { // Rate targetting variables - int this_frame_target; + int base_frame_target; // A baseline frame target before adjustment + // for previous under or over shoot. + int this_frame_target; // Actual frame target after rc adjustment. int projected_frame_size; int sb64_target_rate; int last_q[3]; // Separate values for Intra/Inter/ARF-GF @@ -67,6 +69,7 @@ typedef struct { int64_t buffer_level; int64_t bits_off_target; + int64_t vbr_bits_off_target; int decimation_factor; int decimation_count; @@ -87,9 +90,10 @@ typedef struct { } RATE_CONTROL; struct VP9_COMP; -struct VP9_CONFIG; +struct VP9EncoderConfig; -void vp9_rc_init(const struct VP9_CONFIG *oxcf, int pass, RATE_CONTROL *rc); +void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass, + RATE_CONTROL *rc); double vp9_convert_qindex_to_q(int qindex); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 63c099d75..f7f2ec6c8 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -243,9 +243,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { cpi->mb.sadperbit4 = sad_per_bit4lut[qindex]; } -static void set_block_thresholds(VP9_COMP *cpi) { - const VP9_COMMON *const cm = &cpi->common; - RD_OPT *const rd = &cpi->rd; +static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { @@ -280,20 +278,21 @@ static void set_block_thresholds(VP9_COMP *cpi) { void vp9_initialize_rd_consts(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; + RD_OPT *const rd = &cpi->rd; int i; vp9_clear_system_state(); - cpi->rd.RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) - cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); + rd->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128) + rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO; + x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO; x->errorperbit += (x->errorperbit == 0); x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; - set_block_thresholds(cpi); + set_block_thresholds(cm, rd); if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { fill_token_costs(x->token_costs, cm->fc.coef_probs); @@ -2178,12 +2177,12 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x, x->pred_mv_sad[ref_frame] = best_sad; } -static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id, +static void estimate_ref_frame_costs(const VP9_COMMON *cm, + const MACROBLOCKD *xd, + int segment_id, unsigned int *ref_costs_single, unsigned int *ref_costs_comp, vp9_prob *comp_mode_p) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { @@ -2348,7 +2347,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; - VP9_COMMON *cm = &cpi->common; + const VP9_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}}; int bestsme = INT_MAX; @@ -2386,7 +2385,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, // Work out the size of the first step in the mv step search. // 0 here is maximum length first step. 1 is MAX >> 1 etc. - if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) { + if (cpi->sf.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and that based on the best ref mvs of the current // block for the given reference. @@ -2397,7 +2396,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 && - cpi->common.show_frame) { + cm->show_frame) { int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize), b_width_log2(bsize))); step_param = MAX(step_param, boffset); @@ -2412,7 +2411,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (tlevel < 5) step_param += 2; - for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) { + for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { x->pred_mv[ref].as_int = 0; tmp_mv->as_int = INVALID_MV; @@ -2496,7 +2495,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) + if (cpi->sf.adaptive_motion_search && cm->show_frame) x->pred_mv[ref].as_int = tmp_mv->as_int; if (scaled_ref_frame) { @@ -3104,13 +3103,13 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, // combination that wins out. static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize, int best_mode_index) { - if (cpi->sf.adaptive_rd_thresh) { + if (cpi->sf.adaptive_rd_thresh > 0) { const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; - int mode_index; - for (mode_index = 0; mode_index < top_mode; ++mode_index) { - int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode_index]; + int mode; + for (mode = 0; mode < top_mode; ++mode) { + int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode]; - if (mode_index == best_mode_index) { + if (mode == best_mode_index) { *fact -= (*fact >> 3); } else { *fact = MIN(*fact + RD_THRESH_INC, @@ -3178,7 +3177,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; - estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) @@ -3792,7 +3791,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, seg_mvs[i][j].as_int = INVALID_MV; } - estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp, + estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) @@ -4374,3 +4373,120 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } + +void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { + int i; + RD_OPT *const rd = &cpi->rd; + + // Set baseline threshold values + for (i = 0; i < MAX_MODES; ++i) + rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; + + rd->thresh_mult[THR_NEARESTMV] = 0; + rd->thresh_mult[THR_NEARESTG] = 0; + rd->thresh_mult[THR_NEARESTA] = 0; + + rd->thresh_mult[THR_DC] += 1000; + + rd->thresh_mult[THR_NEWMV] += 1000; + rd->thresh_mult[THR_NEWA] += 1000; + rd->thresh_mult[THR_NEWG] += 1000; + + rd->thresh_mult[THR_NEARMV] += 1000; + rd->thresh_mult[THR_NEARA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; + rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; + + rd->thresh_mult[THR_TM] += 1000; + + rd->thresh_mult[THR_COMP_NEARLA] += 1500; + rd->thresh_mult[THR_COMP_NEWLA] += 2000; + rd->thresh_mult[THR_NEARG] += 1000; + rd->thresh_mult[THR_COMP_NEARGA] += 1500; + rd->thresh_mult[THR_COMP_NEWGA] += 2000; + + rd->thresh_mult[THR_ZEROMV] += 2000; + rd->thresh_mult[THR_ZEROG] += 2000; + rd->thresh_mult[THR_ZEROA] += 2000; + rd->thresh_mult[THR_COMP_ZEROLA] += 2500; + rd->thresh_mult[THR_COMP_ZEROGA] += 2500; + + rd->thresh_mult[THR_H_PRED] += 2000; + rd->thresh_mult[THR_V_PRED] += 2000; + rd->thresh_mult[THR_D45_PRED ] += 2500; + rd->thresh_mult[THR_D135_PRED] += 2500; + rd->thresh_mult[THR_D117_PRED] += 2500; + rd->thresh_mult[THR_D153_PRED] += 2500; + rd->thresh_mult[THR_D207_PRED] += 2500; + rd->thresh_mult[THR_D63_PRED] += 2500; + + /* disable frame modes if flags not set */ + if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) { + rd->thresh_mult[THR_NEWMV ] = INT_MAX; + rd->thresh_mult[THR_NEARESTMV] = INT_MAX; + rd->thresh_mult[THR_ZEROMV ] = INT_MAX; + rd->thresh_mult[THR_NEARMV ] = INT_MAX; + } + if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) { + rd->thresh_mult[THR_NEARESTG ] = INT_MAX; + rd->thresh_mult[THR_ZEROG ] = INT_MAX; + rd->thresh_mult[THR_NEARG ] = INT_MAX; + rd->thresh_mult[THR_NEWG ] = INT_MAX; + } + if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) { + rd->thresh_mult[THR_NEARESTA ] = INT_MAX; + rd->thresh_mult[THR_ZEROA ] = INT_MAX; + rd->thresh_mult[THR_NEARA ] = INT_MAX; + rd->thresh_mult[THR_NEWA ] = INT_MAX; + } + + if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != + (VP9_LAST_FLAG | VP9_ALT_FLAG)) { + rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX; + } + if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != + (VP9_GOLD_FLAG | VP9_ALT_FLAG)) { + rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX; + rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX; + rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX; + } +} + +void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { + const SPEED_FEATURES *const sf = &cpi->sf; + RD_OPT *const rd = &cpi->rd; + int i; + + for (i = 0; i < MAX_REFS; ++i) + rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0; + + rd->thresh_mult_sub8x8[THR_LAST] += 2500; + rd->thresh_mult_sub8x8[THR_GOLD] += 2500; + rd->thresh_mult_sub8x8[THR_ALTR] += 2500; + rd->thresh_mult_sub8x8[THR_INTRA] += 2500; + rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500; + rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500; + + // Check for masked out split cases. + for (i = 0; i < MAX_REFS; i++) + if (sf->disable_split_mask & (1 << i)) + rd->thresh_mult_sub8x8[i] = INT_MAX; + + // disable mode test if frame flag is not set + if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) + rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX; + if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) + rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX; + if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX; + if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) != + (VP9_LAST_FLAG | VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX; + if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) != + (VP9_GOLD_FLAG | VP9_ALT_FLAG)) + rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX; +} diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index a01dbd4d3..e48566499 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -83,6 +83,10 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16]); +void vp9_set_rd_speed_thresholds(VP9_COMP *cpi); + +void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index f79ded5f9..bfde37065 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -80,9 +80,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } if (speed >= 2) { - sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD - : USE_LARGESTALL; - if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; @@ -104,6 +101,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } if (speed >= 3) { + sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD + : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) sf->disable_split_mask = DISABLE_ALL_SPLIT; else @@ -288,7 +287,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf, void vp9_set_speed_features(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; VP9_COMMON *const cm = &cpi->common; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; int i; // best quality defaults diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 3c4e3e064..e408200b4 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -15,7 +15,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; int layer; int layer_end; @@ -65,7 +65,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { SVC *const svc = &cpi->svc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; int layer; int layer_end; @@ -118,7 +118,7 @@ static LAYER_CONTEXT *get_layer_context(SVC *svc) { void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(svc); RATE_CONTROL *const lrc = &lc->rc; const int layer = svc->temporal_layer_id; @@ -141,7 +141,7 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { } void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); RATE_CONTROL *const lrc = &lc->rc; @@ -187,7 +187,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { } void vp9_save_layer_context(VP9_COMP *const cpi) { - const VP9_CONFIG *const oxcf = &cpi->oxcf; + const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(&cpi->svc); lc->rc = cpi->rc; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index c98c9d415..398488866 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -27,8 +27,6 @@ #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" -#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering - static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, @@ -122,8 +120,6 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, } } -#if ALT_REF_MC_ENABLED - static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, uint8_t *arf_frame_buf, uint8_t *frame_ptr_buf, @@ -133,6 +129,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; + int distortion; + unsigned int sse; MV best_ref_mv1 = {0, 0}; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -154,26 +152,19 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, step_param = cpi->sf.reduce_first_step_size + (cpi->oxcf.speed > 5 ? 1 : 0); step_param = MIN(step_param, cpi->sf.max_step_search_steps - 2); - /*cpi->sf.search_method == HEX*/ // Ignore mv costing by sending NULL pointer instead of cost arrays vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv); - // Try sub-pixel MC? - // if (bestsme > error_thresh && bestsme < INT_MAX) - { - int distortion; - unsigned int sse; - // Ignore mv costing by sending NULL pointer instead of cost array - bestsme = cpi->find_fractional_mv_step(x, ref_mv, - &best_ref_mv1, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[BLOCK_16X16], - 0, cpi->sf.subpel_iters_per_step, - NULL, NULL, - &distortion, &sse); - } + // Ignore mv costing by sending NULL pointer instead of cost array + bestsme = cpi->find_fractional_mv_step(x, ref_mv, + &best_ref_mv1, + cpi->common.allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[BLOCK_16X16], + 0, cpi->sf.subpel_iters_per_step, + NULL, NULL, + &distortion, &sse); // Restore input state x->plane[0].src = src; @@ -181,7 +172,6 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, return bestsme; } -#endif static void temporal_filter_iterate_c(VP9_COMP *cpi, int frame_count, @@ -215,8 +205,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, input_buffer[i] = mbd->plane[i].pre[0].buf; for (mb_row = 0; mb_row < mb_rows; mb_row++) { -#if ALT_REF_MC_ENABLED - // Source frames are extended to 16 pixels. This is different than + // Source frames are extended to 16 pixels. This is different than // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS) // A 6/8 tap filter is used for motion search. This requires 2 pixels // before and 3 pixels after. So the largest Y mv on a border would @@ -230,7 +219,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND); -#endif for (mb_col = 0; mb_col < mb_cols; mb_col++) { int i, j, k; @@ -239,13 +227,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); -#if ALT_REF_MC_ENABLED cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND); -#endif for (frame = 0; frame < frame_count; frame++) { + const int thresh_low = 10000; + const int thresh_high = 20000; + if (cpi->frames[frame] == NULL) continue; @@ -255,38 +244,31 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, if (frame == alt_ref_index) { filter_weight = 2; } else { - int err = 0; -#if ALT_REF_MC_ENABLED -#define THRESH_LOW 10000 -#define THRESH_HIGH 20000 - // Find best match in this frame by MC - err = temporal_filter_find_matching_mb_c - (cpi, - cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->y_stride); -#endif + int err = temporal_filter_find_matching_mb_c(cpi, + cpi->frames[alt_ref_index]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->y_stride); + // Assign higher weight to matching MB if it's error // score is lower. If not applying MC default behavior // is to weight all MBs equal. - filter_weight = err < THRESH_LOW - ? 2 : err < THRESH_HIGH ? 1 : 0; + filter_weight = err < thresh_low + ? 2 : err < thresh_high ? 1 : 0; } if (filter_weight != 0) { // Construct the predictors - temporal_filter_predictors_mb_c - (mbd, - cpi->frames[frame]->y_buffer + mb_y_offset, - cpi->frames[frame]->u_buffer + mb_uv_offset, - cpi->frames[frame]->v_buffer + mb_uv_offset, - cpi->frames[frame]->y_stride, - mb_uv_height, - mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, - mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, - predictor, scale, - mb_col * 16, mb_row * 16); + temporal_filter_predictors_mb_c(mbd, + cpi->frames[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mb_uv_height, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.row, + mbd->mi[0]->bmi[0].as_mv[0].as_mv.col, + predictor, scale, + mb_col * 16, mb_row * 16); // Apply the filter (YUV) vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, @@ -320,7 +302,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // move to next pixel byte++; } - byte += stride - 16; } @@ -347,14 +328,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // move to next pixel byte++; } - byte += stride - mb_uv_height; } - mb_y_offset += 16; mb_uv_offset += mb_uv_height; } - mb_y_offset += 16 * (f->y_stride - mb_cols); mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols); } @@ -366,79 +344,34 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { VP9_COMMON *const cm = &cpi->common; - int frame = 0; - - int frames_to_blur_backward = 0; - int frames_to_blur_forward = 0; int frames_to_blur = 0; int start_frame = 0; - int strength = cpi->active_arnr_strength; - int blur_type = cpi->oxcf.arnr_type; int max_frames = cpi->active_arnr_frames; - - const int num_frames_backward = distance; - const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead) - - (num_frames_backward + 1); + int frames_to_blur_backward = distance; + int frames_to_blur_forward = vp9_lookahead_depth(cpi->lookahead) + - (distance + 1); struct scale_factors sf; - switch (blur_type) { - case 1: - // Backward Blur - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_backward >= max_frames) - frames_to_blur_backward = max_frames - 1; + // Determine which input frames to filter. + if (frames_to_blur_forward > frames_to_blur_backward) + frames_to_blur_forward = frames_to_blur_backward; - frames_to_blur = frames_to_blur_backward + 1; - break; + if (frames_to_blur_backward > frames_to_blur_forward) + frames_to_blur_backward = frames_to_blur_forward; - case 2: - // Forward Blur - frames_to_blur_forward = num_frames_forward; + // When max_frames is even we have 1 more frame backward than forward + if (frames_to_blur_forward > (max_frames - 1) / 2) + frames_to_blur_forward = (max_frames - 1) / 2; - if (frames_to_blur_forward >= max_frames) - frames_to_blur_forward = max_frames - 1; + if (frames_to_blur_backward > (max_frames / 2)) + frames_to_blur_backward = max_frames / 2; - frames_to_blur = frames_to_blur_forward + 1; - break; - - case 3: - default: - // Center Blur - frames_to_blur_forward = num_frames_forward; - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_forward > frames_to_blur_backward) - frames_to_blur_forward = frames_to_blur_backward; - - if (frames_to_blur_backward > frames_to_blur_forward) - frames_to_blur_backward = frames_to_blur_forward; - - // When max_frames is even we have 1 more frame backward than forward - if (frames_to_blur_forward > (max_frames - 1) / 2) - frames_to_blur_forward = ((max_frames - 1) / 2); - - if (frames_to_blur_backward > (max_frames / 2)) - frames_to_blur_backward = (max_frames / 2); - - frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; - break; - } + frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; start_frame = distance + frames_to_blur_forward; -#ifdef DEBUGFWG - // DEBUG FWG - printf( - "max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d " - "start:%d", - max_frames, num_frames_backward, num_frames_forward, frames_to_blur, - frames_to_blur_backward, frames_to_blur_forward, cpi->source_encode_index, - cpi->last_alt_ref_sei, start_frame); -#endif - // Setup scaling factors. Scaling on each of the arnr frames is not supported vp9_setup_scale_factors_for_frame(&sf, get_frame_new_buffer(cm)->y_crop_width, @@ -447,7 +380,7 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { // Setup frame pointers, NULL indicates frame not included in filter vp9_zero(cpi->frames); - for (frame = 0; frame < frames_to_blur; frame++) { + for (frame = 0; frame < frames_to_blur; ++frame) { int which_buffer = start_frame - frame; struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, which_buffer); @@ -461,11 +394,11 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) { void vp9_configure_arnr_filter(VP9_COMP *cpi, const unsigned int frames_to_arnr, const int group_boost) { + int q; int half_gf_int; int frames_after_arf; - int frames_bwd = cpi->oxcf.arnr_max_frames - 1; - int frames_fwd = cpi->oxcf.arnr_max_frames - 1; - int q; + int frames_bwd; + int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; // Define the arnr filter width for this group of frames. We only // filter frames that lie within a distance of half the GF interval @@ -477,47 +410,26 @@ void vp9_configure_arnr_filter(VP9_COMP *cpi, frames_after_arf = vp9_lookahead_depth(cpi->lookahead) - frames_to_arnr - 1; - switch (cpi->oxcf.arnr_type) { - case 1: // Backward filter - frames_fwd = 0; - if (frames_bwd > half_gf_int) - frames_bwd = half_gf_int; - break; - - case 2: // Forward filter - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - frames_bwd = 0; - break; - - case 3: // Centered filter - default: - frames_fwd >>= 1; - if (frames_fwd > frames_after_arf) - frames_fwd = frames_after_arf; - if (frames_fwd > half_gf_int) - frames_fwd = half_gf_int; - - frames_bwd = frames_fwd; - - // For even length filter there is one more frame backward - // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. - if (frames_bwd < half_gf_int) - frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; - break; - } + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + + frames_bwd = frames_fwd; + + // For even length filter there is one more frame backward + // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + if (frames_bwd < half_gf_int) + frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; // Adjust the strength based on active max q if (cpi->common.current_video_frame > 1) - q = ((int)vp9_convert_qindex_to_q( - cpi->rc.avg_frame_qindex[INTER_FRAME])); + q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME])); else - q = ((int)vp9_convert_qindex_to_q( - cpi->rc.avg_frame_qindex[KEY_FRAME])); + q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME])); + if (q > 16) { cpi->active_arnr_strength = cpi->oxcf.arnr_strength; } else { diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 71867a938..a31a476f3 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -18,63 +18,34 @@ #include "vp9/encoder/vp9_variance.h" -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum) { +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, unsigned int *sse, int *sum) { int i, j; - int diff; *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { - diff = src_ptr[j] - ref_ptr[j]; + const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } - src_ptr += source_stride; - ref_ptr += recon_stride; + a += a_stride; + b += b_stride; } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_first_pass - * - * INPUTS : uint8_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement first-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// first-pass of 2-D separable filter. +// +// Produces int32_t output to retain precision for next pass. Two filter taps +// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is +// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It +// defines the offset required to move from one input to the next. static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, @@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, } } -/**************************************************************************** - * - * ROUTINE : filter_block2d_bil_second_pass - * - * INPUTS : int32_t *src_ptr : Pointer to source block. - * uint32_t src_pixels_per_line : Stride of input block. - * uint32_t pixel_step : Offset between filter input - * samples (see notes). - * uint32_t output_height : Input block height. - * uint32_t output_width : Input block width. - * int32_t *vp9_filter : Array of 2 bi-linear filter - * taps. - * - * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. - * - * RETURNS : void - * - * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in - * either horizontal or vertical direction to produce the - * filtered output block. Used to implement second-pass - * of 2-D separable filter. - * - * SPECIAL NOTES : Requires 32-bit input as produced by - * filter_block2d_bil_first_pass. - * Two filter taps should sum to VP9_FILTER_WEIGHT. - * pixel_step defines whether the filter is applied - * horizontally (pixel_step=1) or vertically (pixel_step= - * stride). - * It defines the offset required to move from one input - * to the next. - * - ****************************************************************************/ +// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal +// or vertical direction to produce the filtered output block. Used to implement +// second-pass of 2-D separable filter. +// +// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two +// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the +// filter is applied horizontally (pixel_step=1) or vertically (pixel_step= +// stride). It defines the offset required to move from one input to the next. static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, @@ -156,9 +103,8 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; - for (i = 0; i < 256; i++) { - sum += (src_ptr[i] * src_ptr[i]); - } + for (i = 0; i < 256; i++) + sum += src_ptr[i] * src_ptr[i]; return sum; } @@ -183,12 +129,10 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -205,13 +149,11 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 64, hfilter); @@ -240,12 +182,10 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -262,13 +202,11 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 32, hfilter); @@ -297,12 +235,10 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -319,13 +255,11 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 32, hfilter); @@ -354,12 +288,10 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -376,13 +308,11 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 16, hfilter); @@ -582,17 +512,12 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); @@ -607,18 +532,13 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, unsigned int *sse, const uint8_t *second_pred) { uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer - uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); + uint16_t fdata3[5 * 4]; + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); - // First filter 1d Horizontal var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 4, hfilter); - - // Now filter Verticaly var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); @@ -631,17 +551,14 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); - return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -653,18 +570,17 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 8, hfilter); var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); + return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); } @@ -675,12 +591,10 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -699,11 +613,9 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, const uint8_t *second_pred) { uint16_t fdata3[17 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 16, hfilter); @@ -722,10 +634,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering uint8_t temp2[68 * 64]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -742,13 +652,11 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering + uint16_t fdata3[65 * 64]; uint8_t temp2[68 * 64]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 65, 64, hfilter); @@ -764,12 +672,10 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -786,13 +692,11 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering + uint16_t fdata3[33 * 32]; uint8_t temp2[36 * 32]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 33, 32, hfilter); @@ -928,12 +832,10 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -950,13 +852,11 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering + uint16_t fdata3[16 * 9]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 16, hfilter); @@ -974,10 +874,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, unsigned int *sse) { uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -994,13 +892,11 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering + uint16_t fdata3[9 * 16]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 17, 8, hfilter); @@ -1016,12 +912,10 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1038,13 +932,11 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering + uint16_t fdata3[8 * 5]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 5, 8, hfilter); @@ -1060,14 +952,12 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, const uint8_t *dst_ptr, int dst_pixels_per_line, unsigned int *sse) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be // of this big? same issue appears in all other block size settings. uint8_t temp2[20 * 16]; - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1084,13 +974,11 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, int dst_pixels_per_line, unsigned int *sse, const uint8_t *second_pred) { - uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering + uint16_t fdata3[5 * 8]; uint8_t temp2[20 * 16]; - DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer - const int16_t *hfilter, *vfilter; - - hfilter = BILINEAR_FILTERS_2TAP(xoffset); - vfilter = BILINEAR_FILTERS_2TAP(yoffset); + DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); + const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset); + const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset); var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, 1, 9, 4, hfilter); @@ -1106,9 +994,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { - int tmp; - tmp = pred[j] + ref[j]; - comp_pred[j] = (tmp + 1) >> 1; + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); } comp_pred += width; pred += width; diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index c9e39a1a2..4c8be71cd 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -17,14 +17,10 @@ extern "C" { #endif -void variance(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - int w, - int h, - unsigned int *sse, - int *sum); +void variance(const uint8_t *a, int a_stride, + const uint8_t *b, int b_stride, + int w, int h, + unsigned int *sse, int *sum); typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr, int source_stride, diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 87d4bb7c1..6e3c28090 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -30,7 +30,6 @@ struct vp9_extracfg { unsigned int tile_rows; unsigned int arnr_max_frames; unsigned int arnr_strength; - unsigned int arnr_type; vp8e_tuning tuning; unsigned int cq_level; // constrained quality level unsigned int rc_max_intra_bitrate_pct; @@ -60,7 +59,6 @@ static const struct extraconfig_map extracfg_map[] = { 0, // tile_rows 7, // arnr_max_frames 5, // arnr_strength - 3, // arnr_type VP8_TUNE_PSNR, // tuning 10, // cq_level 0, // rc_max_intra_bitrate_pct @@ -77,7 +75,7 @@ struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; struct vp9_extracfg extra_cfg; - VP9_CONFIG oxcf; + VP9EncoderConfig oxcf; VP9_COMP *cpi; unsigned char *cx_data; size_t cx_data_sz; @@ -203,7 +201,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK_HI(extra_cfg, sharpness, 7); RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(extra_cfg, arnr_strength, 6); - RANGE_CHECK(extra_cfg, arnr_type, 1, 3); RANGE_CHECK(extra_cfg, cq_level, 0, 63); // TODO(yaowu): remove this when ssim tuning is implemented for vp9 @@ -291,7 +288,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t set_encoder_config( - VP9_CONFIG *oxcf, + VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { oxcf->profile = cfg->g_profile; @@ -318,13 +315,13 @@ static vpx_codec_err_t set_encoder_config( oxcf->lag_in_frames = cfg->g_pass == VPX_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames; - oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; + oxcf->rc_mode = RC_MODE_VBR; if (cfg->rc_end_usage == VPX_CQ) - oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; + oxcf->rc_mode = RC_MODE_CONSTRAINED_QUALITY; else if (cfg->rc_end_usage == VPX_Q) - oxcf->end_usage = USAGE_CONSTANT_QUALITY; + oxcf->rc_mode = RC_MODE_CONSTANT_QUALITY; else if (cfg->rc_end_usage == VPX_CBR) - oxcf->end_usage = USAGE_STREAM_FROM_SERVER; + oxcf->rc_mode = RC_MODE_CBR; oxcf->target_bandwidth = cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; @@ -367,7 +364,6 @@ static vpx_codec_err_t set_encoder_config( oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; oxcf->arnr_strength = extra_cfg->arnr_strength; - oxcf->arnr_type = extra_cfg->arnr_type; oxcf->tuning = extra_cfg->tuning; @@ -498,7 +494,6 @@ static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, MAP(VP9E_SET_TILE_ROWS, extra_cfg.tile_rows); MAP(VP8E_SET_ARNR_MAXFRAMES, extra_cfg.arnr_max_frames); MAP(VP8E_SET_ARNR_STRENGTH, extra_cfg.arnr_strength); - MAP(VP8E_SET_ARNR_TYPE, extra_cfg.arnr_type); MAP(VP8E_SET_TUNING, extra_cfg.tuning); MAP(VP8E_SET_CQ_LEVEL, extra_cfg.cq_level); MAP(VP8E_SET_MAX_INTRA_BITRATE_PCT, extra_cfg.rc_max_intra_bitrate_pct); @@ -521,7 +516,8 @@ static vpx_codec_err_t ctrl_set_param(vpx_codec_alg_priv_t *ctx, int ctrl_id, #undef MAP } -static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { +static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { vpx_codec_err_t res = VPX_CODEC_OK; if (ctx->priv == NULL) { @@ -529,7 +525,8 @@ static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { vpx_codec_enc_cfg_t *cfg; struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv)); - if (priv == NULL) return VPX_CODEC_MEM_ERROR; + if (priv == NULL) + return VPX_CODEC_MEM_ERROR; ctx->priv = &priv->base; ctx->priv->sz = sizeof(*ctx->priv); @@ -539,8 +536,7 @@ static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { ctx->priv->enc.total_encoders = 1; if (ctx->config.enc) { - // Update the reference to the config structure to an - // internal copy. + // Update the reference to the config structure to an internal copy. ctx->priv->alg_priv->cfg = *ctx->config.enc; ctx->config.enc = &ctx->priv->alg_priv->cfg; } @@ -556,11 +552,11 @@ static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { priv->extra_cfg = extracfg_map[i].cfg; priv->extra_cfg.pkt_list = &priv->pkt_list.head; - - // Maximum buffer size approximated based on having multiple ARF. + // Maximum buffer size approximated based on having multiple ARF. priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 8; - if (priv->cx_data_sz < 4096) priv->cx_data_sz = 4096; + if (priv->cx_data_sz < 4096) + priv->cx_data_sz = 4096; priv->cx_data = (unsigned char *)malloc(priv->cx_data_sz); if (priv->cx_data == NULL) @@ -573,8 +569,8 @@ static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { if (res == VPX_CODEC_OK) { VP9_COMP *cpi; set_encoder_config(&ctx->priv->alg_priv->oxcf, - &ctx->priv->alg_priv->cfg, - &ctx->priv->alg_priv->extra_cfg); + &ctx->priv->alg_priv->cfg, + &ctx->priv->alg_priv->extra_cfg); cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf); if (cpi == NULL) res = VPX_CODEC_MEM_ERROR; @@ -586,12 +582,6 @@ static vpx_codec_err_t encoder_common_init(vpx_codec_ctx_t *ctx) { return res; } - -static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, - vpx_codec_priv_enc_mr_cfg_t *data) { - return encoder_common_init(ctx); -} - static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); vp9_remove_compressor(ctx->cpi); @@ -628,7 +618,8 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, } } - +// Turn on to test if supplemental superframe data breaks decoding +// #define TEST_SUPPLEMENTAL_SUPERFRAME_DATA static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { uint8_t marker = 0xc0; unsigned int mask; @@ -654,6 +645,20 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) { uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz; int i, j; +#ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA + uint8_t marker_test = 0xc0; + int mag_test = 2; // 1 - 4 + int frames_test = 4; // 1 - 8 + int index_sz_test = 2 + mag_test * frames_test; + marker_test |= frames_test - 1; + marker_test |= (mag_test - 1) << 3; + *x++ = marker_test; + for (i = 0; i < mag_test * frames_test; ++i) + *x++ = 0; // fill up with arbitrary data + *x++ = marker_test; + ctx->pending_cx_data_sz += index_sz_test; + printf("Added supplemental superframe data\n"); +#endif *x++ = marker; for (i = 0; i < ctx->pending_frame_count; i++) { @@ -666,6 +671,9 @@ static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { } *x++ = marker; ctx->pending_cx_data_sz += index_sz; +#ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA + index_sz += index_sz_test; +#endif } return index_sz; } @@ -866,7 +874,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, return res; } - static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); @@ -1100,7 +1107,6 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP9E_SET_TILE_ROWS, ctrl_set_param}, {VP8E_SET_ARNR_MAXFRAMES, ctrl_set_param}, {VP8E_SET_ARNR_STRENGTH, ctrl_set_param}, - {VP8E_SET_ARNR_TYPE, ctrl_set_param}, {VP8E_SET_TUNING, ctrl_set_param}, {VP8E_SET_CQ_LEVEL, ctrl_set_param}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_param}, @@ -363,7 +363,7 @@ static const arg_def_t frame_periodic_boost = ARG_DEF( static const arg_def_t *vp9_args[] = { &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, - &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &arnr_type, + &tile_cols, &tile_rows, &arnr_maxframes, &arnr_strength, &tune_ssim, &cq_level, &max_intra_rate_pct, &lossless, &frame_parallel_decoding, &aq_mode, &frame_periodic_boost, NULL @@ -372,7 +372,7 @@ static const int vp9_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, - VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, + VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, VP9E_SET_FRAME_PERIODIC_BOOST, |