From a3c1a9b4193f199d67954d70ba83e11ea87a99f1 Mon Sep 17 00:00:00 2001 From: Paul Wilkins Date: Fri, 19 Dec 2014 16:58:05 +0000 Subject: Use 64 bit to accumulate frame sse. When testing frame sse to choose a loop filter value and when checking ambient error in kf Q selection, use 64 bit values for accumulating the sse, to avoid risk of overflow for large image formats. Change-Id: I03765d16c843d0ade61a45b0cd46312472697e57 --- vp9/encoder/vp9_encoder.c | 54 ++++++++++++++++------------------------------- vp9/encoder/vp9_encoder.h | 9 ++++---- vp9/encoder/vp9_picklpf.c | 16 ++++++++------ 3 files changed, 31 insertions(+), 48 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index b7bfddff1..8b932c468 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2549,7 +2549,7 @@ static void full_to_model_counts(vp9_coeff_count_model *model_count, static void output_frame_level_debug_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w"); - int recon_err; + int64_t recon_err; vp9_clear_system_state(); @@ -2561,7 +2561,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf" "%6d %6d %5d %5d %5d " "%10"PRId64" %10.3lf" - "%10lf %8u %10d %10d %10d\n", + "%10lf %8u %10"PRId64" %10d %10d\n", cpi->common.current_video_frame, cpi->rc.this_frame_target, cpi->rc.projected_frame_size, cpi->rc.projected_frame_size / cpi->common.MBs, @@ -2890,15 +2890,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err; + int64_t kf_err; - int high_err_target = cpi->ambient_err; - int low_err_target = cpi->ambient_err >> 1; + int64_t high_err_target = cpi->ambient_err; + int64_t low_err_target = cpi->ambient_err >> 1; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm), - cm->bit_depth); + kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } else { kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } @@ -2919,7 +2918,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, q_high = q > q_low ? q - 1 : q_low; // Adjust Q - q = (q * high_err_target) / kf_err; + q = (int)((q * high_err_target) / kf_err); q = MIN(q, (q_high + q_low) >> 1); } else if (kf_err < low_err_target && rc->projected_frame_size >= frame_under_shoot_limit) { @@ -2928,7 +2927,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, q_low = q < q_high ? q + 1 : q_high; // Adjust Q - q = (q * low_err_target) / kf_err; + q = (int)((q * low_err_target) / kf_err); q = MIN(q, (q_high + q_low + 1) >> 1); } @@ -3256,8 +3255,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { cpi->ambient_err = vp9_highbd_get_y_sse(cpi->Source, - get_frame_new_buffer(cm), - cm->bit_depth); + get_frame_new_buffer(cm)); } else { cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } @@ -4026,41 +4024,25 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { return; } -int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { +int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { assert(a->y_crop_width == b->y_crop_width); assert(a->y_crop_height == b->y_crop_height); - return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height); + return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } #if CONFIG_VP9_HIGHBITDEPTH -int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - vpx_bit_depth_t bit_depth) { - unsigned int sse; - int sum; +int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { assert(a->y_crop_width == b->y_crop_width); assert(a->y_crop_height == b->y_crop_height); assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0); assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0); - switch (bit_depth) { - case VPX_BITS_8: - highbd_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - case VPX_BITS_10: - highbd_10_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - case VPX_BITS_12: - highbd_12_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, - a->y_crop_width, a->y_crop_height, &sse, &sum); - return (int) sse; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; - } + + return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height); } #endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 7872e2cc1..d256935f1 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -304,7 +304,7 @@ typedef struct VP9_COMP { unsigned int tok_count[4][1 << 6]; // Ambient reconstruction err target for force key frames - int ambient_err; + int64_t ambient_err; RD_OPT rd; @@ -534,11 +534,10 @@ static INLINE int allocated_tokens(TileInfo tile) { return get_token_alloc(tile_mb_rows, tile_mb_cols); } -int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); +int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #if CONFIG_VP9_HIGHBITDEPTH -int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - vpx_bit_depth_t bit_depth); +int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b); #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_alloc_compressor_data(VP9_COMP *cpi); diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 81334e448..01cc519f9 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -33,16 +33,17 @@ static int get_max_filter_level(const VP9_COMP *cpi) { } -static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, - int filt_level, int partial_frame) { +static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, + VP9_COMP *const cpi, + int filt_level, int partial_frame) { VP9_COMMON *const cm = &cpi->common; - int filt_err; + int64_t filt_err; vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { - filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth); + filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show); } else { filt_err = vp9_get_y_sse(sd, cm->frame_to_show); } @@ -63,14 +64,15 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); int filt_direction = 0; - int best_err, filt_best; + int64_t best_err; + int filt_best; // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level); int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; // Sum squared error at each filter level - int ss_err[MAX_LOOP_FILTER + 1]; + int64_t ss_err[MAX_LOOP_FILTER + 1]; // Set each entry to -1 vpx_memset(ss_err, 0xFF, sizeof(ss_err)); @@ -87,7 +89,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, const int filt_low = MAX(filt_mid - filter_step, min_filter_level); // Bias against raising loop filter in favor of lowering it. - int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; + int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) bias = (bias * cpi->twopass.section_intra_rating) / 20; -- cgit v1.2.3