diff options
Diffstat (limited to 'vp8')
-rw-r--r-- | vp8/common/onyx.h | 5 | ||||
-rw-r--r-- | vp8/common/postproc.c | 3 | ||||
-rw-r--r-- | vp8/decoder/decodframe.c | 1 | ||||
-rw-r--r-- | vp8/decoder/threading.c | 5 | ||||
-rw-r--r-- | vp8/encoder/encodeframe.c | 18 | ||||
-rw-r--r-- | vp8/encoder/encodemb.c | 1 | ||||
-rw-r--r-- | vp8/encoder/firstpass.c | 204 | ||||
-rw-r--r-- | vp8/encoder/generic/csystemdependent.c | 3 | ||||
-rw-r--r-- | vp8/encoder/mcomp.c | 4 | ||||
-rw-r--r-- | vp8/encoder/mcomp.h | 1 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 322 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 13 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 6 | ||||
-rw-r--r-- | vp8/encoder/quantize.c | 1 | ||||
-rw-r--r-- | vp8/encoder/ratectrl.c | 15 | ||||
-rw-r--r-- | vp8/encoder/rdopt.c | 229 | ||||
-rw-r--r-- | vp8/encoder/temporal_filter.c | 64 | ||||
-rw-r--r-- | vp8/encoder/temporal_filter.h | 6 | ||||
-rw-r--r-- | vp8/encoder/x86/temporal_filter_apply_sse2.asm | 207 | ||||
-rw-r--r-- | vp8/encoder/x86/temporal_filter_x86.h | 27 | ||||
-rw-r--r-- | vp8/encoder/x86/x86_csystemdependent.c | 7 | ||||
-rw-r--r-- | vp8/vp8_cx_iface.c | 15 | ||||
-rw-r--r-- | vp8/vp8cx.mk | 2 |
23 files changed, 803 insertions, 356 deletions
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 3724b11e0..426b8fc2b 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -46,7 +46,8 @@ extern "C" typedef enum { USAGE_STREAM_FROM_SERVER = 0x0, - USAGE_LOCAL_FILE_PLAYBACK = 0x1 + USAGE_LOCAL_FILE_PLAYBACK = 0x1, + USAGE_CONSTRAINED_QUALITY = 0x2 } END_USAGE; @@ -150,6 +151,7 @@ extern "C" int fixed_q; int worst_allowed_q; int best_allowed_q; + int cq_level; // allow internal resizing ( currently disabled in the build !!!!!) int allow_spatial_resampling; @@ -187,7 +189,6 @@ extern "C" int arnr_strength ; int arnr_type ; - struct vpx_fixed_buf two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 15b1c2c89..d30068ef5 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -680,7 +680,6 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { - char message[512]; int q = oci->filter_level * 10 / 6; int flags = ppflags->post_proc_flag; int deblock_level = ppflags->deblocking_level; @@ -744,6 +743,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t #if CONFIG_POSTPROC_VISUALIZER if (flags & VP8D_DEBUG_TXT_FRAME_INFO) { + char message[512]; sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d", (oci->frame_type == KEY_FRAME), oci->refresh_golden_frame, @@ -823,6 +823,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t if (flags & VP8D_DEBUG_TXT_RATE_INFO) { + char message[512]; sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate); vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride); } diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 4702faeed..9305a0556 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -485,7 +485,6 @@ static void setup_token_decoder(VP8D_COMP *pbi, static void stop_token_decoder(VP8D_COMP *pbi) { - int i; VP8_COMMON *pc = &pbi->common; if (pc->multi_token_partition != ONE_PARTITION) diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index fea4e1cc1..dac990a26 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -451,7 +451,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) #if CONFIG_MULTITHREAD int core_count = 0; int ithread; - int i; pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; @@ -721,7 +720,6 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl) /*int mb_row; int mb_col; int baseline_filter_level[MAX_MB_SEGMENTS];*/ - int filter_level; int alt_flt_enabled = mbd->segmentation_enabled; int i; @@ -769,7 +767,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) int ibc = 0; int num_part = 1 << pbi->common.multi_token_partition; - int i, j; + int i; volatile int *last_row_current_mb_col = NULL; int nsync = pbi->sync_range; @@ -809,7 +807,6 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { - int i; xd->current_bc = &pbi->mbc[mb_row%num_part]; diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index cb7cc65d7..4c95f28d6 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -408,7 +408,6 @@ unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) int sum; unsigned int a; unsigned int b; - unsigned int d; /* TODO: This could also be done over smaller areas (8x8), but that would * require extensive changes elsewhere, as lambda is assumed to be fixed * over an entire MB in most of the code. @@ -629,7 +628,6 @@ void vp8_encode_frame(VP8_COMP *cpi) VP8_COMMON *const cm = & cpi->common; MACROBLOCKD *const xd = & x->e_mbd; - int i; TOKENEXTRA *tp = cpi->tok; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; @@ -712,9 +710,7 @@ void vp8_encode_frame(VP8_COMP *cpi) } vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); - //vp8_initialize_rd_consts( cpi, vp8_dc_quant(cpi->avg_frame_qindex, cm->y1dc_delta_q) ); vp8cx_initialize_me_consts(cpi, cm->base_qindex); - //vp8cx_initialize_me_consts( cpi, cpi->avg_frame_qindex); // Copy data over into macro block data sturctures. @@ -734,20 +730,6 @@ void vp8_encode_frame(VP8_COMP *cpi) x->activity_sum = 0; -#if 0 - // Experimental rd code - // 2 Pass - Possibly set Rdmult based on last frame distortion + this frame target bits or other metrics - // such as cpi->rate_correction_factor that indicate relative complexity. - /*if ( cpi->pass == 2 && (cpi->last_frame_distortion > 0) && (cpi->target_bits_per_mb > 0) ) - { - //x->rdmult = ((cpi->last_frame_distortion * 256)/cpi->common.MBs)/ cpi->target_bits_per_mb; - x->rdmult = (int)(cpi->RDMULT * cpi->rate_correction_factor); - } - else - x->rdmult = cpi->RDMULT; */ - //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 )); -#endif - xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index 464d4a236..efcea745b 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -273,7 +273,6 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type, int x; int sz; int next; - int path; int rdmult; int rddiv; int final_eob; diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index da4d740cb..32a39c5f2 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -58,6 +58,7 @@ extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE]; #define KF_MB_INTRA_MIN 300 #define GF_MB_INTRA_MIN 200 + #define DOUBLE_DIVIDE_CHECK(X) ((X)<0?(X)-.000001:(X)+.000001) #define POW1 (double)cpi->oxcf.two_pass_vbrbias/100.0 @@ -67,6 +68,18 @@ static int vscale_lookup[7] = {0, 1, 1, 2, 2, 3, 3}; static int hscale_lookup[7] = {0, 0, 1, 1, 2, 2, 3}; +const int cq_level[QINDEX_RANGE] = +{ + 0,0,1,1,2,3,3,4,4,5,6,6,7,8,8,9, + 9,10,11,11,12,13,13,14,15,15,16,17,17,18,19,20, + 20,21,22,22,23,24,24,25,26,27,27,28,29,30,30,31, + 32,33,33,34,35,36,36,37,38,39,39,40,41,42,42,43, + 44,45,46,46,47,48,49,50,50,51,52,53,54,55,55,56, + 57,58,59,60,60,61,62,63,64,65,66,67,67,68,69,70, + 71,72,73,74,75,75,76,77,78,79,80,81,82,83,84,85, + 86,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100 +}; + void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps); @@ -249,7 +262,6 @@ extern size_t vp8_firstpass_stats_sz(unsigned int mb_count) * macroblock. */ size_t stats_sz; - FIRSTPASS_STATS stats; stats_sz = sizeof(FIRSTPASS_STATS) + mb_count; stats_sz = (stats_sz + 7) & ~7; @@ -376,8 +388,6 @@ unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi) } void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos) { - int Offset; - cpi->fp_motion_map_stats = target_pos; } @@ -907,7 +917,7 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_ double pow_lowq = 0.40; if (section_target_bandwitdh <= 0) - return MAXQ; + return cpi->maxq_max_limit; // Highest value allowed target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); @@ -943,10 +953,12 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_ // Correction factor used for Q values >= 20 corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq); - corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; + corr_high = (corr_high < 0.05) + ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; - // Try and pick a Q that should be high enough to encode the content at the given rate. - for (Q = 0; Q < MAXQ; Q++) + // Try and pick a max Q that will be high enough to encode the + // content at the given rate. + for (Q = cpi->maxq_min_limit; Q < cpi->maxq_max_limit; Q++) { int bits_per_mb_at_this_q; @@ -965,6 +977,28 @@ static int estimate_max_q(VP8_COMP *cpi, double section_err, int section_target_ break; } + // Restriction on active max q for constrained quality mode. + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->cq_target_quality) ) + //(Q < cpi->oxcf.cq_level;) ) + { + Q = cpi->cq_target_quality; + //Q = cpi->oxcf.cq_level; + } + + // Adjust maxq_min_limit and maxq_max_limit limits based on + // averaga q observed in clip for non kf/gf.arf frames + // Give average a chance to settle though. + if ( (cpi->ni_frames > + ((unsigned int)cpi->total_stats->count >> 8)) && + (cpi->ni_frames > 150) ) + { + cpi->maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) + ? (cpi->ni_av_qi + 32) : cpi->worst_quality; + cpi->maxq_min_limit = ((cpi->ni_av_qi - 32) > cpi->best_quality) + ? (cpi->ni_av_qi - 32) : cpi->best_quality; + } + return Q; } static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, int Height, int Width) @@ -1113,6 +1147,79 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta return Q; } + +// For cq mode estimate a cq level that matches the observed +// complexity and data rate. +static int estimate_cq(VP8_COMP *cpi, double section_err, + int section_target_bandwitdh, int Height, int Width) +{ + int Q; + int num_mbs = ((Height * Width) / (16 * 16)); + int target_norm_bits_per_mb; + + double err_per_mb = section_err / num_mbs; + double correction_factor; + double corr_high; + double speed_correction = 1.0; + double pow_highq = 0.90; + double pow_lowq = 0.40; + double clip_iiratio; + double clip_iifactor; + + target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) + ? (512 * section_target_bandwitdh) / num_mbs + : 512 * (section_target_bandwitdh / num_mbs); + + // Corrections for higher compression speed settings + // (reduced compression expected) + if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) + { + if (cpi->oxcf.cpu_used <= 5) + speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); + else + speed_correction = 1.25; + } + // II ratio correction factor for clip as a whole + clip_iiratio = cpi->total_stats->intra_error / + DOUBLE_DIVIDE_CHECK(cpi->total_stats->coded_error); + clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); + if (clip_iifactor < 0.80) + clip_iifactor = 0.80; + + // Correction factor used for Q values >= 20 + corr_high = pow(err_per_mb / BASE_ERRPERMB, pow_highq); + corr_high = (corr_high < 0.05) ? 0.05 : (corr_high > 5.0) ? 5.0 : corr_high; + + // Try and pick a Q that can encode the content at the given rate. + for (Q = 0; Q < MAXQ; Q++) + { + int bits_per_mb_at_this_q; + + if (Q < 50) + { + correction_factor = + pow( err_per_mb / BASE_ERRPERMB, (pow_lowq + Q * 0.01)); + + correction_factor = (correction_factor < 0.05) ? 0.05 + : (correction_factor > 5.0) ? 5.0 + : correction_factor; + } + else + correction_factor = corr_high; + + bits_per_mb_at_this_q = + (int)( .5 + correction_factor * + speed_correction * + clip_iifactor * + (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0); + + if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) + break; + } + + return cq_level[Q]; +} + extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate); void vp8_init_second_pass(VP8_COMP *cpi) @@ -1268,7 +1375,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // what level of boost is appropriate for the GF or ARF that will be coded with the group i = 0; - while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key)) + while (((i < cpi->static_scene_max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key)) { double r; double this_frame_mvr_ratio; @@ -1378,18 +1485,20 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) boost_score += (decay_accumulator * r); // Break out conditions. - if ( /* i>4 || */ + if ( /* i>4 || */ + // Break at cpi->max_gf_interval unless almost totally static + (i >= cpi->max_gf_interval && (loop_decay_rate < 0.99)) || ( - (i > MIN_GF_INTERVAL) && // Dont break out with a very short interval - ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && // Dont break out very close to a key frame + // Dont break out with a very short interval + (i > MIN_GF_INTERVAL) && + // Dont break out very close to a key frame + ((cpi->frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || (mv_in_out_accumulator < -2.0) || - ((boost_score - old_boost_score) < 2.0) - ) - ) - ) + ((boost_score - old_boost_score) < 2.0)) + ) ) { boost_score = old_boost_score; break; @@ -1766,7 +1875,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) vp8_avg_stats(§ionstats); - cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->section_intra_rating = + sectionstats.intra_error / + DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); //if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) @@ -1993,21 +2104,48 @@ void vp8_second_pass(VP8_COMP *cpi) if (cpi->common.current_video_frame == 0) { - // guess at 2nd pass q cpi->est_max_qcorrection_factor = 1.0; - tmp_q = estimate_max_q(cpi, (cpi->total_coded_error_left / frames_left), (int)(cpi->bits_left / frames_left), cpi->common.Height, cpi->common.Width); - if (tmp_q < cpi->worst_quality) - { - cpi->active_worst_quality = tmp_q; - cpi->ni_av_qi = tmp_q; - } - else + // Experimental code to try and set a cq_level in constrained + // quality mode. + if ( cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY ) { - cpi->active_worst_quality = cpi->worst_quality; - cpi->ni_av_qi = cpi->worst_quality; + int est_cq; + + est_cq = + estimate_cq( cpi, + (cpi->total_coded_error_left / frames_left), + (int)(cpi->bits_left / frames_left), + cpi->common.Height, cpi->common.Width); + + cpi->cq_target_quality = cpi->oxcf.cq_level; + if ( est_cq > cpi->cq_target_quality ) + cpi->cq_target_quality = est_cq; } + + // guess at maxq needed in 2nd pass + cpi->maxq_max_limit = cpi->worst_quality; + cpi->maxq_min_limit = cpi->best_quality; + tmp_q = estimate_max_q( cpi, + (cpi->total_coded_error_left / frames_left), + (int)(cpi->bits_left / frames_left), + cpi->common.Height, + cpi->common.Width); + + // Limit the maxq value returned subsequently. + // This increases the risk of overspend or underspend if the initial + // estimate for the clip is bad, but helps prevent excessive + // variation in Q, especially near the end of a clip + // where for example a small overspend may cause Q to crash + cpi->maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) + ? (tmp_q + 32) : cpi->worst_quality; + cpi->maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) + ? (tmp_q - 32) : cpi->best_quality; + + cpi->active_worst_quality = tmp_q; + cpi->ni_av_qi = tmp_q; } + // The last few frames of a clip almost always have to few or too many // bits and for the sake of over exact rate control we dont want to make // radical adjustments to the allowed quantizer range just to use up a @@ -2029,13 +2167,6 @@ void vp8_second_pass(VP8_COMP *cpi) cpi->active_worst_quality --; cpi->active_worst_quality = ((cpi->active_worst_quality * 3) + tmp_q + 2) / 4; - - // Clamp to user set limits - if (cpi->active_worst_quality > cpi->worst_quality) - cpi->active_worst_quality = cpi->worst_quality; - else if (cpi->active_worst_quality < cpi->best_quality) - cpi->active_worst_quality = cpi->best_quality; - } cpi->frames_to_key --; @@ -2157,6 +2288,9 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->common.frame_type = KEY_FRAME; + // is this a forced key frame by interval + cpi->this_key_frame_forced = cpi->next_key_frame_forced; + // Clear the alt ref active flag as this can never be active on a key frame cpi->source_alt_ref_active = FALSE; @@ -2219,7 +2353,11 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) kf_group_err /= 2.0; kf_group_intra_err /= 2.0; kf_group_coded_err /= 2.0; + + cpi->next_key_frame_forced = TRUE; } + else + cpi->next_key_frame_forced = FALSE; // Special case for the last frame of the file if (cpi->stats_in >= cpi->stats_in_end) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index be00d0218..4738a5b28 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -91,8 +91,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; - +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sad; +#endif cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index 9b91739cc..d9923fbe9 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -408,6 +408,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); break; case 3: + default: this_mv.col += 4; this_mv.row += 4; diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); @@ -1387,8 +1388,6 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er else return INT_MAX; } -#endif - int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2], MV *center_mv) { @@ -1541,6 +1540,7 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er else return INT_MAX; } +#endif /* !(CONFIG_REALTIME_ONLY) */ #ifdef ENTROPY_STATS void print_mode_context(void) diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 122debcae..7600f87fc 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -24,7 +24,6 @@ extern void accum_mv_refs(MB_PREDICTION_MODE, const int near_mv_ref_cts[4]); #define MAX_MVSEARCH_STEPS 8 // The maximum number of steps in a step search given the largest allowed initial step #define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS+3)) - 8) // Max full pel mv specified in 1/8 pel units #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS-1)) // Maximum size of the first step in full pel units -#define MAX_POSSIBLE_MV (1 << 11) // Maximum MV in 1/8 pel units extern void print_mode_context(void); extern int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight); diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 56f7ef6f8..77fbb29b1 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -155,25 +155,25 @@ extern const int vp8cx_base_skip_false_prob[128]; // Tables relating active max Q to active min Q static const int kf_low_motion_minq[QINDEX_RANGE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, - 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10, - 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, - 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, - 27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, + 3,3,3,3,3,3,4,4,4,5,5,5,5,5,6,6, + 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11, + 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16, + 16,16,17,17,18,18,18,18,19,20,20,21,21,22,23,23 }; static const int kf_high_motion_minq[QINDEX_RANGE] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, - 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10, - 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, - 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, - 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34, - 35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,2,2,2,2,3,3,3,3, + 3,3,3,3,4,4,4,4,5,5,5,5,5,5,6,6, + 6,6,7,7,8,8,8,8,9,9,10,10,10,10,11,11, + 11,11,12,12,13,13,13,13,14,14,15,15,15,15,16,16, + 16,16,17,17,18,18,18,18,19,19,20,20,20,20,21,21, + 21,21,22,22,23,23,24,25,25,26,26,27,28,28,29,30 }; static const int gf_low_motion_minq[QINDEX_RANGE] = { @@ -195,7 +195,7 @@ static const int gf_mid_motion_minq[QINDEX_RANGE] = 22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29, 30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37, 38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48, - 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, + 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64 }; static const int gf_high_motion_minq[QINDEX_RANGE] = { @@ -206,7 +206,7 @@ static const int gf_high_motion_minq[QINDEX_RANGE] = 25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32, 33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40, 41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54, - 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80, + 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80 }; static const int inter_minq[QINDEX_RANGE] = { @@ -314,7 +314,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->tok); cpi->tok = 0; - // Structure used to minitor GF useage + // Structure used to monitor GF usage if (cpi->gf_active_flags != 0) vpx_free(cpi->gf_active_flags); @@ -325,6 +325,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) cpi->mb.pip = 0; +#if !(CONFIG_REALTIME_ONLY) if(cpi->total_stats) vpx_free(cpi->total_stats); @@ -334,6 +335,7 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->this_frame_stats); cpi->this_frame_stats = 0; +#endif } static void enable_segmentation(VP8_PTR ptr) @@ -576,7 +578,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) cpi->mbs_tested_so_far = 0; - // best quality + // best quality defaults sf->RD = 1; sf->search_method = NSTEP; sf->improved_quant = 1; @@ -592,6 +594,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->iterative_sub_pixel = 1; sf->optimize_coefficients = 1; sf->use_fastquant_for_pick = 0; + sf->no_skip_block4x4_search = 1; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; @@ -794,6 +797,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->first_step = 1; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; + sf->no_skip_block4x4_search = 0; } if (Speed > 1) @@ -1268,6 +1272,15 @@ void vp8_set_speed_features(VP8_COMP *cpi) }; + // Slow quant, dct and trellis not worthwhile for first pass + // so make sure they are always turned off. + if ( cpi->pass == 1 ) + { + sf->improved_quant = 0; + sf->optimize_coefficients = 0; + sf->improved_dct = 0; + } + if (cpi->sf.search_method == NSTEP) { vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride); @@ -1437,6 +1450,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) cpi->gf_active_count = cm->mb_rows * cm->mb_cols; +#if !(CONFIG_REALTIME_ONLY) if(cpi->total_stats) vpx_free(cpi->total_stats); @@ -1450,6 +1464,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) if(!cpi->total_stats || !cpi->this_frame_stats) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate firstpass stats"); +#endif } @@ -1486,21 +1501,28 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2; - //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1; - //cpi->max_gf_interval = 24; + // Set Maximum gf/arf interval + cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2); - if (cpi->max_gf_interval < 12) + if(cpi->max_gf_interval < 12) cpi->max_gf_interval = 12; + // Extended interval for genuinely static scenes + cpi->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; - // Special conditions when altr ref frame enabled in lagged compress mode + // Special conditions when altr ref frame enabled in lagged compress mode if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1; + + if (cpi->static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1) + cpi->static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1; } + + if ( cpi->max_gf_interval > cpi->static_scene_max_gf_interval ) + cpi->max_gf_interval = cpi->static_scene_max_gf_interval; } @@ -1540,6 +1562,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->auto_worst_q = 0; cpi->oxcf.best_allowed_q = MINQ; cpi->oxcf.worst_allowed_q = MAXQ; + cpi->oxcf.cq_level = MINQ; cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER; cpi->oxcf.starting_buffer_level = 4000; @@ -1640,6 +1663,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; + cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; if (oxcf->fixed_q >= 0) { @@ -1729,6 +1753,8 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; cpi->best_quality = cpi->oxcf.best_allowed_q; cpi->active_best_quality = cpi->oxcf.best_allowed_q; + cpi->cq_target_quality = cpi->oxcf.cq_level; + cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; @@ -1925,6 +1951,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; + cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; if (oxcf->fixed_q >= 0) { @@ -2017,6 +2044,8 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; + cpi->cq_target_quality = cpi->oxcf.cq_level; + cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; @@ -2258,6 +2287,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; + cpi->this_key_frame_forced = FALSE; + cpi->next_key_frame_forced = FALSE; cpi->source_alt_ref_pending = FALSE; cpi->source_alt_ref_active = FALSE; @@ -3081,9 +3112,6 @@ static int pick_frame_size(VP8_COMP *cpi) } } - // Note target_size in bits * 256 per MB - cpi->target_bits_per_mb = (cpi->this_frame_target * 256) / cpi->common.MBs; - return 1; } static void set_quantizer(VP8_COMP *cpi, int Q) @@ -3133,8 +3161,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi) // Update data structure that monitors level of reference to last GF vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; - // this frame refreshes means next frames don't unless specified by user + // this frame refreshes means next frames don't unless specified by user cpi->common.frames_since_golden = 0; // Clear the alternate reference update pending flag. @@ -3510,8 +3538,25 @@ static BOOL recode_loop_test( VP8_COMP *cpi, { force_recode = TRUE; } - // Specific rate control mode related tests - // TBD + // Special Constrained quality tests + else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + // Undershoot and below auto cq level + if ( (q > cpi->cq_target_quality) && + (cpi->projected_frame_size < + ((cpi->this_frame_target * 7) >> 3))) + { + force_recode = TRUE; + } + // Severe undershoot and between auto and user cq level + else if ( (q > cpi->oxcf.cq_level) && + (cpi->projected_frame_size < cpi->min_frame_bandwidth) && + (cpi->active_best_quality > cpi->oxcf.cq_level)) + { + force_recode = TRUE; + cpi->active_best_quality = cpi->oxcf.cq_level; + } + } } return force_recode; @@ -3778,51 +3823,85 @@ static void encode_frame_to_data_rate } // Set an active best quality and if necessary active worst quality - if (cpi->pass == 2 || (cm->current_video_frame > 150)) + // There is some odd behaviour for one pass here that needs attention. + if ( (cpi->pass == 2) || (cpi->ni_frames > 150)) { - int Q; - int i; - int bpm_target; - //int tmp; - vp8_clear_system_state(); Q = cpi->active_worst_quality; - if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + if ( cm->frame_type == KEY_FRAME ) { - if (cm->frame_type != KEY_FRAME) + if ( cpi->pass == 2 ) { - if (cpi->avg_frame_qindex < cpi->active_worst_quality) - Q = cpi->avg_frame_qindex; + if (cpi->gfu_boost > 600) + cpi->active_best_quality = kf_low_motion_minq[Q]; + else + cpi->active_best_quality = kf_high_motion_minq[Q]; - if ( cpi->gfu_boost > 1000 ) + // Special case for key frames forced because we have reached + // the maximum key frame interval. Here force the Q to a range + // based on the ambient Q to reduce the risk of popping + if ( cpi->this_key_frame_forced ) + { + if ( cpi->active_best_quality > cpi->avg_frame_qindex * 7/8) + cpi->active_best_quality = cpi->avg_frame_qindex * 7/8; + else if ( cpi->active_best_quality < cpi->avg_frame_qindex >> 2 ) + cpi->active_best_quality = cpi->avg_frame_qindex >> 2; + } + } + // One pass more conservative + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + } + + else if (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) + { + // Use the lower of cpi->active_worst_quality and recent + // average Q as basis for GF/ARF Q limit unless last frame was + // a key frame. + if ( (cpi->frames_since_key > 1) && + (cpi->avg_frame_qindex < cpi->active_worst_quality) ) + { + Q = cpi->avg_frame_qindex; + + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < cpi->oxcf.cq_level) ) + { + Q = cpi->oxcf.cq_level; + } + } + + if ( cpi->pass == 2 ) + { + if ( cpi->gfu_boost > 1000 ) cpi->active_best_quality = gf_low_motion_minq[Q]; else if ( cpi->gfu_boost < 400 ) cpi->active_best_quality = gf_high_motion_minq[Q]; else cpi->active_best_quality = gf_mid_motion_minq[Q]; - - /*cpi->active_best_quality = gf_arf_minq[Q]; - tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400; - //tmp = (cpi->gfu_boost > 1000) ? 600 : - //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400; - tmp = 128 - (tmp >> 4); - cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/ - - } - // KEY FRAMES - else - { - if (cpi->gfu_boost > 600) - cpi->active_best_quality = kf_low_motion_minq[Q]; - else - cpi->active_best_quality = kf_high_motion_minq[Q]; - } + } + // One pass more conservative + else + cpi->active_best_quality = gf_high_motion_minq[Q]; } else { cpi->active_best_quality = inter_minq[Q]; + + // For the constant/constrained quality mode we dont want + // the quality to rise above the cq level. + if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (cpi->active_best_quality < cpi->cq_target_quality) ) + { + // If we are strongly undershooting the target rate in the last + // frames then use the user passed in cq value not the auto + // cq value. + if ( cpi->rolling_actual_bits < cpi->min_frame_bandwidth ) + cpi->active_best_quality = cpi->oxcf.cq_level; + else + cpi->active_best_quality = cpi->cq_target_quality; + } } // If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames @@ -3840,7 +3919,6 @@ static void encode_frame_to_data_rate cpi->active_best_quality -= min_qadjustment; } - } } @@ -4101,9 +4179,44 @@ static void encode_frame_to_data_rate active_worst_qchanged = FALSE; #if !(CONFIG_REALTIME_ONLY) + // Special case handling for forced key frames + if ( (cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced ) + { + int last_q = Q; + int kf_err = vp8_calc_ss_err(cpi->Source, + &cm->yv12_fb[cm->new_fb_idx], + IF_RTCD(&cpi->rtcd.variance)); + + // The key frame is not good enough + if ( kf_err > ((cpi->ambient_err * 3) >> 2) ) + { + // Lower q_high + q_high = (Q > q_low) ? (Q - 1) : q_low; + + // Adjust Q + Q = (q_high + q_low) >> 1; + } + // The key frame is much better than the previous frame + else if ( kf_err < (cpi->ambient_err >> 1) ) + { + // Raise q_low + q_low = (Q < q_high) ? (Q + 1) : q_high; + + // Adjust Q + Q = (q_high + q_low + 1) >> 1; + } + + // Clamp Q to upper and lower limits: + if (Q > q_high) + Q = q_high; + else if (Q < q_low) + Q = q_low; + + Loop = ((Q != last_q)) ? TRUE : FALSE; + } // Is the projected frame size out of range and are we allowed to attempt to recode. - if ( recode_loop_test( cpi, + else if ( recode_loop_test( cpi, frame_over_shoot_limit, frame_under_shoot_limit, Q, top_index, bottom_index ) ) { @@ -4119,7 +4232,7 @@ static void encode_frame_to_data_rate //if ( cpi->zbin_over_quant == 0 ) q_low = (Q < q_high) ? (Q + 1) : q_high; // Raise Qlow as to at least the current value - if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low + if (cpi->zbin_over_quant > 0) // If we are using over quant do the same for zbin_oq_low zbin_oq_low = (cpi->zbin_over_quant < zbin_oq_high) ? (cpi->zbin_over_quant + 1) : zbin_oq_high; //if ( undershoot_seen || (Q == MAXQ) ) @@ -4188,6 +4301,16 @@ static void encode_frame_to_data_rate Q = vp8_regulate_q(cpi, cpi->this_frame_target); + // Special case reset for qlow for constrained quality. + // This should only trigger where there is very substantial + // undershoot on a frame and the auto cq level is above + // the user passsed in value. + if ( (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && + (Q < q_low) ) + { + q_low = Q; + } + while (((Q > q_high) || (cpi->zbin_over_quant > zbin_oq_high)) && (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); @@ -4252,9 +4375,15 @@ static void encode_frame_to_data_rate } #endif - // Update the GF useage maps. - // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter - vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); + // Special case code to reduce pulsing when key frames are forced at a + // fixed interval. Note the reconstruction error if it is the frame before + // the force key frame + if ( cpi->next_key_frame_forced && (cpi->frames_to_key == 0) ) + { + cpi->ambient_err = vp8_calc_ss_err(cpi->Source, + &cm->yv12_fb[cm->new_fb_idx], + IF_RTCD(&cpi->rtcd.variance)); + } // This frame's MVs are saved and will be used in next frame's MV prediction. if(cm->show_frame) //do not save for altref frame @@ -4281,7 +4410,6 @@ static void encode_frame_to_data_rate } } - // Update the GF useage maps. // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); @@ -4312,8 +4440,6 @@ static void encode_frame_to_data_rate else cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; - - //#pragma omp parallel sections { @@ -4421,9 +4547,7 @@ static void encode_frame_to_data_rate } // Keep a record of ambient average Q. - if (cm->frame_type == KEY_FRAME) - cpi->avg_frame_qindex = cm->base_qindex; - else + if (cm->frame_type != KEY_FRAME) cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; // Keep a record from which we can calculate the average Q excluding GF updates and key frames @@ -4431,30 +4555,38 @@ static void encode_frame_to_data_rate { cpi->ni_frames++; - // Calculate the average Q for normal inter frames (not key or GFU frames) - // This is used as a basis for setting active worst quality. - if (cpi->ni_frames > 150) + // Calculate the average Q for normal inter frames (not key or GFU + // frames). + if ( cpi->pass == 2 ) { cpi->ni_tot_qi += Q; cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); } - // Early in the clip ... average the current frame Q value with the default - // entered by the user as a dampening measure else { - cpi->ni_tot_qi += Q; - cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; - } - - // If the average Q is higher than what was used in the last frame - // (after going through the recode loop to keep the frame size within range) - // then use the last frame value - 1. - // The -1 is designed to stop Q and hence the data rate, from progressively - // falling away during difficult sections, but at the same time reduce the number of - // itterations around the recode loop. - if (Q > cpi->ni_av_qi) - cpi->ni_av_qi = Q - 1; + // Damp value for first few frames + if (cpi->ni_frames > 150 ) + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); + } + // For one pass, early in the clip ... average the current frame Q + // value with the worstq entered by the user as a dampening measure + else + { + cpi->ni_tot_qi += Q; + cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; + } + // If the average Q is higher than what was used in the last frame + // (after going through the recode loop to keep the frame size within range) + // then use the last frame value - 1. + // The -1 is designed to stop Q and hence the data rate, from progressively + // falling away during difficult sections, but at the same time reduce the number of + // itterations around the recode loop. + if (Q > cpi->ni_av_qi) + cpi->ni_av_qi = Q - 1; + } } #if 0 @@ -4548,7 +4680,7 @@ static void encode_frame_to_data_rate if (cpi->total_coded_error_left != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" - "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" "%10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, @@ -4557,7 +4689,8 @@ static void encode_frame_to_data_rate (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, - cpi->avg_frame_qindex, cpi->zbin_over_quant, + cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, + //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, @@ -4566,7 +4699,7 @@ static void encode_frame_to_data_rate cpi->tot_recode_hits); else fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" - "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" "%8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, @@ -4575,7 +4708,8 @@ static void encode_frame_to_data_rate (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, - cpi->avg_frame_qindex, cpi->zbin_over_quant, + cpi->ni_av_qi, cpi->cq_target_quality, cpi->zbin_over_quant, + //cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, @@ -4807,7 +4941,9 @@ extern void vp8_pop_neon(INT64 *store); #endif int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time) { +#if HAVE_ARMV7 INT64 store_reg[8]; +#endif VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; @@ -4910,7 +5046,9 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON } int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush) { +#if HAVE_ARMV7 INT64 store_reg[8]; +#endif VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer tsctimer; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 2f9cc4776..8a97e983b 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -183,6 +183,7 @@ typedef struct int optimize_coefficients; int use_fastquant_for_pick; + int no_skip_block4x4_search; } SPEED_FEATURES; @@ -316,7 +317,11 @@ typedef struct unsigned int frames_since_key; unsigned int key_frame_frequency; - unsigned int next_key; + unsigned int this_key_frame_forced; + unsigned int next_key_frame_forced; + + // Ambient reconstruction err target for force key frames + int ambient_err; unsigned int mode_check_freq[MAX_MODES]; unsigned int mode_test_hit_counts[MAX_MODES]; @@ -365,7 +370,6 @@ typedef struct int this_frame_target; int projected_frame_size; int last_q[2]; // Separate values for Intra/Inter - int target_bits_per_mb; double rate_correction_factor; double key_frame_rate_correction_factor; @@ -398,6 +402,7 @@ typedef struct int kf_overspend_bits; // Extra bits spent on key frames that need to be recovered on inter frames int kf_bitrate_adjustment; // Current number of bit s to try and recover on each inter frame. int max_gf_interval; + int static_scene_max_gf_interval; int baseline_gf_interval; int gf_decay_rate; int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames @@ -447,6 +452,10 @@ typedef struct int best_quality; int active_best_quality; + int cq_target_quality; + int maxq_max_limit; + int maxq_min_limit; + int drop_frames_allowed; // Are we permitted to drop frames? int drop_frame; // Drop this frame? int drop_count; // How many frames have we dropped? diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 8dfca351c..2b0f57508 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -608,8 +608,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts)); } - //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. - if (cpi->is_src_frame_alt_ref) + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index a67299487..be9f26c7f 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -70,7 +70,6 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; - int zbin; int x, y, z, sz; short *coeff_ptr = b->coeff; short *round_ptr = b->round; diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 8455b7bdb..b69a1965e 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -1550,12 +1550,21 @@ void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; } } - // VBR + // VBR and CQ mode // Note that tighter restrictions here can help quality but hurt encode speed else { - *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; - *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; + // Stron overshoot limit for constrained quality + if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) + { + *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8; + } + else + { + *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; + *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; + } } } } diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index e6c7c9ab3..b2a3e117f 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -36,7 +36,6 @@ #include "dct.h" #include "systemdependent.h" -#define DIAMONDSEARCH 1 #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) #else @@ -46,19 +45,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); - -#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) -/*int RDFUNC( int RM,int DM, int R, int D, int target_r ) -{ - int rd_value; - - rd_value = ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ); - - return rd_value; -}*/ - -#define UVRDFUNC(RM,DM,R,D,target_r) RDFUNC(RM,DM,R,D,target_r) - #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) @@ -223,8 +209,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) { int q; int i; - int *thresh; - int threshmult; double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; double rdconst = 3.00; @@ -271,22 +255,6 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) if (q < 8) q = 8; - if (cpi->ref_frame_flags == VP8_ALT_FLAG) - { - thresh = &cpi->rd_threshes[THR_NEWA]; - threshmult = cpi->sf.thresh_mult[THR_NEWA]; - } - else if (cpi->ref_frame_flags == VP8_GOLD_FLAG) - { - thresh = &cpi->rd_threshes[THR_NEWG]; - threshmult = cpi->sf.thresh_mult[THR_NEWG]; - } - else - { - thresh = &cpi->rd_threshes[THR_NEWMV]; - threshmult = cpi->sf.thresh_mult[THR_NEWMV]; - } - if (cpi->RDMULT > 1000) { cpi->RDDIV = 1; @@ -775,7 +743,7 @@ static int vp8_rd_inter_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distort *rate = rd_cost_mbuv(x); *distortion = ENCODEMB_INVOKE(&cpi->rtcd.encodemb, mbuverr)(x) / 4; - return UVRDFUNC(x->rdmult, x->rddiv, *rate, *distortion, cpi->target_bits_per_mb); + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) @@ -800,7 +768,7 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x); - this_rd = UVRDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { @@ -1097,7 +1065,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, // Segmentation method overheads rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); - this_segment_rd += RDFUNC(x->rdmult, x->rddiv, rate, 0, cpi->target_bits_per_mb); + this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; for (i = 0; i < label_count; i++) @@ -1252,7 +1220,7 @@ void vp8_rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); rate += labelyrate; - this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_label_rd) { @@ -1357,10 +1325,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, if (bsi.segment_rd < best_rd) { - int col_min = (best_ref_mv->col - MAX_POSSIBLE_MV) >>3; - int col_max = (best_ref_mv->col + MAX_POSSIBLE_MV) >>3; - int row_min = (best_ref_mv->row - MAX_POSSIBLE_MV) >>3; - int row_max = (best_ref_mv->row + MAX_POSSIBLE_MV) >>3; + int col_min = (best_ref_mv->col - MAX_FULL_PEL_VAL) >>3; + int col_max = (best_ref_mv->col + MAX_FULL_PEL_VAL) >>3; + int row_min = (best_ref_mv->row - MAX_FULL_PEL_VAL) >>3; + int row_max = (best_ref_mv->row + MAX_FULL_PEL_VAL) >>3; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; @@ -1407,7 +1375,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, } /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ - if (bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ + /* Not skip 4x4 if speed=0 (good quality) */ + if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ { bsi.mvp = &bsi.sv_mvp[0]; vp8_rd_check_segment(cpi, x, &bsi, BLOCK_4X4); @@ -1751,7 +1720,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int //int intermodecost[MAX_MODES]; MB_PREDICTION_MODE uv_intra_mode; - int uvintra_eob = 0; + int force_no_skip = 0; MV mvp; @@ -1760,36 +1729,65 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int saddone=0; int sr=0; //search range got from mv_pred(). It uses step_param levels. (0-7) - *returnintra = INT_MAX; + MV frame_nearest_mv[4]; + MV frame_near_mv[4]; + MV frame_best_ref_mv[4]; + int frame_mdcounts[4][4]; + int frame_lf_or_gf[4]; + unsigned char *y_buffer[4]; + unsigned char *u_buffer[4]; + unsigned char *v_buffer[4]; - vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean + vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); - cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame + if (cpi->ref_frame_flags & VP8_LAST_FLAG) + { + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - x->skip = 0; + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[LAST_FRAME], &frame_near_mv[LAST_FRAME], + &frame_best_ref_mv[LAST_FRAME], frame_mdcounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias); - ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); + y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset; + u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset; + v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset; + + frame_lf_or_gf[LAST_FRAME] = 0; + } + + if (cpi->ref_frame_flags & VP8_GOLD_FLAG) + { + YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; + + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[GOLDEN_FRAME], &frame_near_mv[GOLDEN_FRAME], + &frame_best_ref_mv[GOLDEN_FRAME], frame_mdcounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias); + + y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset; + u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset; + v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset; + + frame_lf_or_gf[GOLDEN_FRAME] = 1; + } - // Experimental code - // Adjust the RD multiplier based on the best case distortion we saw in the most recently coded mb - //if ( (cpi->last_mb_distortion) > 0 && (cpi->target_bits_per_mb > 0) ) - /*{ - int tmprdmult; + if (cpi->ref_frame_flags & VP8_ALT_FLAG) + { + YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; + + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &frame_nearest_mv[ALTREF_FRAME], &frame_near_mv[ALTREF_FRAME], + &frame_best_ref_mv[ALTREF_FRAME], frame_mdcounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias); - //tmprdmult = (cpi->last_mb_distortion * 256) / ((cpi->av_per_frame_bandwidth*256)/cpi->common.MBs); - tmprdmult = (cpi->last_mb_distortion * 256) / cpi->target_bits_per_mb; - //tmprdmult = tmprdmult; + y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset; + u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset; + v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset; - //if ( tmprdmult > cpi->RDMULT * 2 ) - // tmprdmult = cpi->RDMULT * 2; - //else if ( tmprdmult < cpi->RDMULT / 2 ) - // tmprdmult = cpi->RDMULT / 2; + frame_lf_or_gf[ALTREF_FRAME] = 1; + } - //tmprdmult = (tmprdmult < 25) ? 25 : tmprdmult; + *returnintra = INT_MAX; + cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame - //x->rdmult = tmprdmult; + x->skip = 0; - }*/ + ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(cpi->prob_intra_coded); // Special case treatment when GF and ARF are not sensible options for reference if (cpi->ref_frame_flags == VP8_LAST_FLAG) @@ -1820,12 +1818,6 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; - { - uvintra_eob = 0; - - for (i = 16; i < 24; i++) - uvintra_eob += x->e_mbd.block[i].eob; - } for (mode_index = 0; mode_index < MAX_MODES; mode_index++) { @@ -1847,8 +1839,6 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (best_rd <= cpi->rd_threshes[mode_index]) continue; - - // These variables hold are rolling total cost and distortion for this mode rate2 = 0; distortion2 = 0; @@ -1859,65 +1849,28 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; - //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. - if (cpi->is_src_frame_alt_ref) + // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, + // unless ARNR filtering is enabled in which case we want + // an unfiltered alternative + if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; } - if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) + /* everything but intra */ + if (x->e_mbd.mode_info_context->mbmi.ref_frame) { - YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; - - if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) - continue; - - lf_or_gf = 0; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; - } - else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - { - YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; - - // not supposed to reference gold frame - if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) - continue; - - lf_or_gf = 1; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset; - } - else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME) - { - YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; - - // not supposed to reference alt ref frame - if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) - continue; - - //if ( !cpi->source_alt_ref_active ) - // continue; - - lf_or_gf = 1; // Local last frame vs Golden frame flag - - // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset; + x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARESTMV] = frame_nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARMV] = frame_near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + best_ref_mv = frame_best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + vpx_memcpy(mdcounts, frame_mdcounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts)); + lf_or_gf = frame_lf_or_gf[x->e_mbd.mode_info_context->mbmi.ref_frame]; } - vp8_find_near_mvs(&x->e_mbd, - x->e_mbd.mode_info_context, - &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv, - mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias); if(x->e_mbd.mode_info_context->mbmi.mode == NEWMV) { @@ -1986,14 +1939,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); /* adjust mvp to make sure it is within MV range */ - if(mvp.row > best_ref_mv.row + MAX_POSSIBLE_MV) - mvp.row = best_ref_mv.row + MAX_POSSIBLE_MV; - else if(mvp.row < best_ref_mv.row - MAX_POSSIBLE_MV) - mvp.row = best_ref_mv.row - MAX_POSSIBLE_MV; - if(mvp.col > best_ref_mv.col + MAX_POSSIBLE_MV) - mvp.col = best_ref_mv.col + MAX_POSSIBLE_MV; - else if(mvp.col < best_ref_mv.col - MAX_POSSIBLE_MV) - mvp.col = best_ref_mv.col - MAX_POSSIBLE_MV; + if(mvp.row > best_ref_mv.row + MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row + MAX_FULL_PEL_VAL; + else if(mvp.row < best_ref_mv.row - MAX_FULL_PEL_VAL) + mvp.row = best_ref_mv.row - MAX_FULL_PEL_VAL; + if(mvp.col > best_ref_mv.col + MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col + MAX_FULL_PEL_VAL; + else if(mvp.col < best_ref_mv.col - MAX_FULL_PEL_VAL) + mvp.col = best_ref_mv.col - MAX_FULL_PEL_VAL; } // Check to see if the testing frequency for this mode is at its max @@ -2125,10 +2078,10 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int further_steps; int n; - int col_min = (best_ref_mv.col - MAX_POSSIBLE_MV) >>3; - int col_max = (best_ref_mv.col + MAX_POSSIBLE_MV) >>3; - int row_min = (best_ref_mv.row - MAX_POSSIBLE_MV) >>3; - int row_max = (best_ref_mv.row + MAX_POSSIBLE_MV) >>3; + int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3; + int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3; + int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3; + int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; @@ -2339,8 +2292,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion_uv = sse2; disable_skip = 1; - this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, - distortion2, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, + distortion2); break; } @@ -2414,7 +2367,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } } // Calculate the final RD estimate for this mode - this_rd = RDFUNC(x->rdmult, x->rddiv, rate2, distortion2, cpi->target_bits_per_mb); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } // Experimental debug code. @@ -2442,8 +2395,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int other_cost += ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; /* Calculate the final y RD estimate for this mode */ - best_yrd = RDFUNC(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost), - (distortion2-distortion_uv), cpi->target_bits_per_mb); + best_yrd = RDCOST(x->rdmult, x->rddiv, (rate2-rate_uv-other_cost), + (distortion2-distortion_uv)); *returnrate = rate2; *returndistortion = distortion2; diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 2fffaa95f..f28daaff9 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -36,36 +36,9 @@ #define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering -#define USE_FILTER_LUT 0 // use lookup table to improve filter #if VP8_TEMPORAL_ALT_REF -#if USE_FILTER_LUT -// for (strength = 0; strength <= 6; strength++) { -// for (delta = 0; delta <= 18; delta++) { -// float coeff = (3.0 * delta * delta) / pow(2, strength); -// printf("%3d", (int)roundf(coeff > 16 ? 0 : 16-coeff)); -// } -// printf("\n"); -// } -static int modifier_lut[7][19] = -{ - // Strength=0 - {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=1 - {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=2 - {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=3 - {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=4 - {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - // Strength=5 - {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, - // Strength=6 - {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} -}; -#endif static void vp8_temporal_filter_predictors_mb_c ( MACROBLOCKD *x, @@ -86,14 +59,11 @@ static void vp8_temporal_filter_predictors_mb_c if ((mv_row | mv_col) & 7) { -// vp8_sixtap_predict16x16_c(yptr, stride, -// mv_col & 7, mv_row & 7, &pred[0], 16); x->subpixel_predict16x16(yptr, stride, mv_col & 7, mv_row & 7, &pred[0], 16); } else { - //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16); RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16); } @@ -127,17 +97,13 @@ void vp8_temporal_filter_apply_c int strength, int filter_weight, unsigned int *accumulator, - unsigned int *count + unsigned short *count ) { int i, j, k; int modifier; int byte = 0; -#if USE_FILTER_LUT - int *lut = modifier_lut[strength]; -#endif - for (i = 0,k = 0; i < block_size; i++) { for (j = 0; j < block_size; j++, k++) @@ -146,11 +112,10 @@ void vp8_temporal_filter_apply_c int src_byte = frame1[byte]; int pixel_value = *frame2++; -#if USE_FILTER_LUT - modifier = abs(src_byte-pixel_value); - modifier = modifier>18 ? 0 : lut[modifier]; -#else modifier = src_byte - pixel_value; + // This is an integer approximation of: + // float coeff = (3.0 * modifer * modifier) / pow(2, strength); + // modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); modifier *= modifier; modifier *= 3; modifier += 1 << (strength - 1); @@ -160,7 +125,6 @@ void vp8_temporal_filter_apply_c modifier = 16; modifier = 16 - modifier; -#endif modifier *= filter_weight; count[k] += modifier; @@ -326,17 +290,17 @@ static void vp8_temporal_filter_iterate_c int mb_col, mb_row; unsigned int filter_weight[MAX_LAG_BUFFERS]; unsigned char *mm_ptr = cpi->fp_motion_map; - int cols = cpi->common.mb_cols; - int rows = cpi->common.mb_rows; + int mb_cols = cpi->common.mb_cols; + int mb_rows = cpi->common.mb_rows; int MBs = cpi->common.MBs; int mb_y_offset = 0; int mb_uv_offset = 0; - unsigned int accumulator[384]; - unsigned int count[384]; + DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16*16 + 8*8 + 8*8); + DECLARE_ALIGNED_ARRAY(16, unsigned short, count, 16*16 + 8*8 + 8*8); MACROBLOCKD *mbd = &cpi->mb.e_mbd; YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; unsigned char *dst1, *dst2; - DECLARE_ALIGNED(16, unsigned char, predictor[384]); + DECLARE_ALIGNED_ARRAY(16, unsigned char, predictor, 16*16 + 8*8 + 8*8); // Save input state unsigned char *y_buffer = mbd->pre.y_buffer; @@ -350,7 +314,7 @@ static void vp8_temporal_filter_iterate_c filter_weight[frame] = 1; } - for (mb_row = 0; mb_row < rows; mb_row++) + for (mb_row = 0; mb_row < mb_rows; mb_row++) { #if ALT_REF_MC_ENABLED // Reduced search extent by 3 for 6-tap filter & smaller UMV border @@ -359,14 +323,14 @@ static void vp8_temporal_filter_iterate_c + (VP8BORDERINPIXELS - 19); #endif - for (mb_col = 0; mb_col < cols; mb_col++) + for (mb_col = 0; mb_col < mb_cols; mb_col++) { int i, j, k, w; int weight_cap; int stride; vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); - vpx_memset(count, 0, 384*sizeof(unsigned int)); + vpx_memset(count, 0, 384*sizeof(unsigned short)); #if ALT_REF_MC_ENABLED // Reduced search extent by 3 for 6-tap filter & smaller UMV border @@ -533,8 +497,8 @@ static void vp8_temporal_filter_iterate_c mb_uv_offset += 8; } - mb_y_offset += 16*f->y_stride-f->y_width; - mb_uv_offset += 8*f->uv_stride-f->uv_width; + mb_y_offset += 16*(f->y_stride-mb_cols); + mb_uv_offset += 8*(f->uv_stride-mb_cols); } // Restore input state diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h index 7b8c21c04..740037a85 100644 --- a/vp8/encoder/temporal_filter.h +++ b/vp8/encoder/temporal_filter.h @@ -22,9 +22,13 @@ int strength, \ int filter_weight, \ unsigned int *accumulator, \ - unsigned int *count \ + unsigned short *count \ ) +#if ARCH_X86 || ARCH_X86_64 +#include "x86/temporal_filter_x86.h" +#endif + #ifndef vp8_temporal_filter_apply #define vp8_temporal_filter_apply vp8_temporal_filter_apply_c #endif diff --git a/vp8/encoder/x86/temporal_filter_apply_sse2.asm b/vp8/encoder/x86/temporal_filter_apply_sse2.asm new file mode 100644 index 000000000..f2adcccba --- /dev/null +++ b/vp8/encoder/x86/temporal_filter_apply_sse2.asm @@ -0,0 +1,207 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +; void vp8_temporal_filter_apply_sse2 | arg +; (unsigned char *frame1, | 0 +; unsigned int stride, | 1 +; unsigned char *frame2, | 2 +; unsigned int block_size, | 3 +; int strength, | 4 +; int filter_weight, | 5 +; unsigned int *accumulator, | 6 +; unsigned short *count) | 7 +global sym(vp8_temporal_filter_apply_sse2) +sym(vp8_temporal_filter_apply_sse2): + + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ALIGN_STACK 16, rax + %define block_size 0 + %define strength 16 + %define filter_weight 32 + %define rounding_bit 48 + %define rbp_backup 64 + %define stack_size 80 + sub rsp, stack_size + mov [rsp + rbp_backup], rbp + ; end prolog + + mov rdx, arg(3) + mov [rsp + block_size], rdx + movd xmm6, arg(4) + movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read + + ; calculate the rounding bit outside the loop + ; 0x8000 >> (16 - strength) + mov rdx, 16 + sub rdx, arg(4) ; 16 - strength + movd xmm4, rdx ; can't use rdx w/ shift + movdqa xmm5, [GLOBAL(_const_top_bit)] + psrlw xmm5, xmm4 + movdqa [rsp + rounding_bit], xmm5 + + mov rsi, arg(0) ; src/frame1 + mov rdx, arg(2) ; predictor frame + mov rdi, arg(6) ; accumulator + mov rax, arg(7) ; count + + ; dup the filter weight and store for later + movd xmm0, arg(5) ; filter_weight + pshuflw xmm0, xmm0, 0 + punpcklwd xmm0, xmm0 + movdqa [rsp + filter_weight], xmm0 + + mov rbp, arg(1) ; stride + pxor xmm7, xmm7 ; zero for extraction + + lea rcx, [rdx + 16*16*1] + cmp dword ptr [rsp + block_size], 8 + jne temporal_filter_apply_load_16 + lea rcx, [rdx + 8*8*1] + +temporal_filter_apply_load_8: + movq xmm0, [rsi] ; first row + lea rsi, [rsi + rbp] ; += stride + punpcklbw xmm0, xmm7 ; src[ 0- 7] + movq xmm1, [rsi] ; second row + lea rsi, [rsi + rbp] ; += stride + punpcklbw xmm1, xmm7 ; src[ 8-15] + jmp temporal_filter_apply_load_finished + +temporal_filter_apply_load_16: + movdqu xmm0, [rsi] ; src (frame1) + lea rsi, [rsi + rbp] ; += stride + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 ; src[ 0- 7] + punpckhbw xmm1, xmm7 ; src[ 8-15] + +temporal_filter_apply_load_finished: + movdqa xmm2, [rdx] ; predictor (frame2) + movdqa xmm3, xmm2 + punpcklbw xmm2, xmm7 ; pred[ 0- 7] + punpckhbw xmm3, xmm7 ; pred[ 8-15] + + ; modifier = src_byte - pixel_value + psubw xmm0, xmm2 ; src - pred[ 0- 7] + psubw xmm1, xmm3 ; src - pred[ 8-15] + + ; modifier *= modifier + pmullw xmm0, xmm0 ; modifer[ 0- 7]^2 + pmullw xmm1, xmm1 ; modifer[ 8-15]^2 + + ; modifier *= 3 + pmullw xmm0, [GLOBAL(_const_3w)] + pmullw xmm1, [GLOBAL(_const_3w)] + + ; modifer += 0x8000 >> (16 - strength) + paddw xmm0, [rsp + rounding_bit] + paddw xmm1, [rsp + rounding_bit] + + ; modifier >>= strength + psrlw xmm0, [rsp + strength] + psrlw xmm1, [rsp + strength] + + ; modifier = 16 - modifier + ; saturation takes care of modifier > 16 + movdqa xmm3, [GLOBAL(_const_16w)] + movdqa xmm2, [GLOBAL(_const_16w)] + psubusw xmm3, xmm1 + psubusw xmm2, xmm0 + + ; modifier *= filter_weight + pmullw xmm2, [rsp + filter_weight] + pmullw xmm3, [rsp + filter_weight] + + ; count + movdqa xmm4, [rax] + movdqa xmm5, [rax+16] + ; += modifier + paddw xmm4, xmm2 + paddw xmm5, xmm3 + ; write back + movdqa [rax], xmm4 + movdqa [rax+16], xmm5 + lea rax, [rax + 16*2] ; count += 16*(sizeof(short)) + + ; load and extract the predictor up to shorts + pxor xmm7, xmm7 + movdqa xmm0, [rdx] + lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char)) + movdqa xmm1, xmm0 + punpcklbw xmm0, xmm7 ; pred[ 0- 7] + punpckhbw xmm1, xmm7 ; pred[ 8-15] + + ; modifier *= pixel_value + pmullw xmm0, xmm2 + pmullw xmm1, xmm3 + + ; expand to double words + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm7 ; [ 0- 3] + punpckhwd xmm2, xmm7 ; [ 4- 7] + movdqa xmm3, xmm1 + punpcklwd xmm1, xmm7 ; [ 8-11] + punpckhwd xmm3, xmm7 ; [12-15] + + ; accumulator + movdqa xmm4, [rdi] + movdqa xmm5, [rdi+16] + movdqa xmm6, [rdi+32] + movdqa xmm7, [rdi+48] + ; += modifier + paddw xmm4, xmm0 + paddw xmm5, xmm2 + paddw xmm6, xmm1 + paddw xmm7, xmm3 + ; write back + movdqa [rdi], xmm4 + movdqa [rdi+16], xmm5 + movdqa [rdi+32], xmm6 + movdqa [rdi+48], xmm7 + lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int)) + + cmp rdx, rcx + je temporal_filter_apply_epilog + pxor xmm7, xmm7 ; zero for extraction + cmp dword ptr [rsp + block_size], 16 + je temporal_filter_apply_load_16 + jmp temporal_filter_apply_load_8 + +temporal_filter_apply_epilog: + ; begin epilog + mov rbp, [rsp + rbp_backup] + add rsp, stack_size + pop rsp + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +_const_3w: + times 8 dw 3 +align 16 +_const_top_bit: + times 8 dw 1<<15 +align 16 +_const_16w + times 8 dw 16 diff --git a/vp8/encoder/x86/temporal_filter_x86.h b/vp8/encoder/x86/temporal_filter_x86.h new file mode 100644 index 000000000..2daa14018 --- /dev/null +++ b/vp8/encoder/x86/temporal_filter_x86.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_VP8_TEMPORAL_FILTER_X86_H +#define __INC_VP8_TEMPORAL_FILTER_X86_H + +#if HAVE_SSE2 +extern prototype_apply(vp8_temporal_filter_apply_sse2); + +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_temporal_filter_apply +#define vp8_temporal_filter_apply vp8_temporal_filter_apply_sse2 + +#endif + +#endif + +#endif // __INC_VP8_TEMPORAL_FILTER_X86_H diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 6e317e2a2..f9b3ea1d8 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -309,6 +309,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/ cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; + + cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; } #endif @@ -321,8 +323,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sadx3; - +#endif cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; @@ -351,7 +354,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; +#if !(CONFIG_REALTIME_ONLY) cpi->rtcd.search.full_search = vp8_full_search_sadx8; +#endif } #endif diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index f95920775..903c56c88 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -38,6 +38,7 @@ struct vp8_extracfg unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */ unsigned int arnr_type; /* alt_ref filter type */ vp8e_tuning tuning; + unsigned int cq_level; /* constrained quality level */ }; @@ -69,6 +70,7 @@ static const struct extraconfig_map extracfg_map[] = 3, /* arnr_strength */ 3, /* arnr_type*/ 0, /* tuning*/ + 10, /* cq_level */ } } }; @@ -148,7 +150,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, #else RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #endif - RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR); + RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CQ); RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); @@ -190,7 +192,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); + RANGE_CHECK(vp8_cfg, cq_level, 0, 63); +#if !(CONFIG_REALTIME_ONLY) if (cfg->g_pass == VPX_RC_LAST_PASS) { int mb_r = (cfg->g_h + 15) / 16; @@ -214,6 +218,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, if ((int)(stats->count + 0.5) != n_packets - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } +#endif return VPX_CODEC_OK; } @@ -298,11 +303,16 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, { oxcf->end_usage = USAGE_STREAM_FROM_SERVER; } + else if (cfg.rc_end_usage == VPX_CQ) + { + oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; + } oxcf->target_bandwidth = cfg.rc_target_bitrate; oxcf->best_allowed_q = cfg.rc_min_quantizer; oxcf->worst_allowed_q = cfg.rc_max_quantizer; + oxcf->cq_level = vp8_cfg.cq_level; oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg.rc_undershoot_pct; @@ -453,6 +463,7 @@ static vpx_codec_err_t set_param(vpx_codec_alg_priv_t *ctx, MAP(VP8E_SET_ARNR_STRENGTH , xcfg.arnr_strength); MAP(VP8E_SET_ARNR_TYPE , xcfg.arnr_type); MAP(VP8E_SET_TUNING, xcfg.tuning); + MAP(VP8E_SET_CQ_LEVEL, xcfg.cq_level); } @@ -1034,6 +1045,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {VP8E_SET_ARNR_STRENGTH , set_param}, {VP8E_SET_ARNR_TYPE , set_param}, {VP8E_SET_TUNING, set_param}, + {VP8E_SET_CQ_LEVEL, set_param}, { -1, NULL}, }; @@ -1069,7 +1081,6 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ - 95, /* rc_undershoot_pct */ 200, /* rc_overshoot_pct */ diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 683d785e6..932f145e6 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -94,6 +94,7 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h +VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm @@ -107,6 +108,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm |