From a3c1a9b4193f199d67954d70ba83e11ea87a99f1 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 19 Dec 2014 16:58:05 +0000
Subject: Use 64 bit to accumulate frame sse.

When testing frame sse to choose a loop filter value and
when checking ambient error in kf Q selection, use 64 bit
values for accumulating the sse, to avoid risk of overflow
for large image formats.

Change-Id: I03765d16c843d0ade61a45b0cd46312472697e57
---
 vp9/encoder/vp9_encoder.c | 54 ++++++++++++++++-------------------------------
 vp9/encoder/vp9_encoder.h |  9 ++++----
 vp9/encoder/vp9_picklpf.c | 16 ++++++++------
 3 files changed, 31 insertions(+), 48 deletions(-)

diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index b7bfddff1..8b932c468 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2549,7 +2549,7 @@ static void full_to_model_counts(vp9_coeff_count_model *model_count,
 static void output_frame_level_debug_stats(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w");
-  int recon_err;
+  int64_t recon_err;
 
   vp9_clear_system_state();
 
@@ -2561,7 +2561,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
         "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
         "%6d %6d %5d %5d %5d "
         "%10"PRId64" %10.3lf"
-        "%10lf %8u %10d %10d %10d\n",
+        "%10lf %8u %10"PRId64" %10d %10d\n",
         cpi->common.current_video_frame, cpi->rc.this_frame_target,
         cpi->rc.projected_frame_size,
         cpi->rc.projected_frame_size / cpi->common.MBs,
@@ -2890,15 +2890,14 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
            rc->this_key_frame_forced &&
            (rc->projected_frame_size < rc->max_frame_bandwidth)) {
         int last_q = q;
-        int kf_err;
+        int64_t kf_err;
 
-        int high_err_target = cpi->ambient_err;
-        int low_err_target = cpi->ambient_err >> 1;
+        int64_t high_err_target = cpi->ambient_err;
+        int64_t low_err_target = cpi->ambient_err >> 1;
 
 #if CONFIG_VP9_HIGHBITDEPTH
         if (cm->use_highbitdepth) {
-          kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm),
-                                        cm->bit_depth);
+          kf_err = vp9_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
         } else {
           kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
         }
@@ -2919,7 +2918,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           q_high = q > q_low ? q - 1 : q_low;
 
           // Adjust Q
-          q = (q * high_err_target) / kf_err;
+          q = (int)((q * high_err_target) / kf_err);
           q = MIN(q, (q_high + q_low) >> 1);
         } else if (kf_err < low_err_target &&
                    rc->projected_frame_size >= frame_under_shoot_limit) {
@@ -2928,7 +2927,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           q_low = q < q_high ? q + 1 : q_high;
 
           // Adjust Q
-          q = (q * low_err_target) / kf_err;
+          q = (int)((q * low_err_target) / kf_err);
           q = MIN(q, (q_high + q_low + 1) >> 1);
         }
 
@@ -3256,8 +3255,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 #if CONFIG_VP9_HIGHBITDEPTH
     if (cm->use_highbitdepth) {
       cpi->ambient_err = vp9_highbd_get_y_sse(cpi->Source,
-                                              get_frame_new_buffer(cm),
-                                              cm->bit_depth);
+                                              get_frame_new_buffer(cm));
     } else {
       cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
     }
@@ -4026,41 +4024,25 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
   return;
 }
 
-int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) {
+int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a,
+                      const YV12_BUFFER_CONFIG *b) {
   assert(a->y_crop_width == b->y_crop_width);
   assert(a->y_crop_height == b->y_crop_height);
 
-  return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
-                      a->y_crop_width, a->y_crop_height);
+  return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+                 a->y_crop_width, a->y_crop_height);
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
-                         const YV12_BUFFER_CONFIG *b,
-                         vpx_bit_depth_t bit_depth) {
-  unsigned int sse;
-  int sum;
+int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
+                             const YV12_BUFFER_CONFIG *b) {
   assert(a->y_crop_width == b->y_crop_width);
   assert(a->y_crop_height == b->y_crop_height);
   assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
   assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0);
-  switch (bit_depth) {
-    case VPX_BITS_8:
-      highbd_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
-                      a->y_crop_width, a->y_crop_height, &sse, &sum);
-      return (int) sse;
-    case VPX_BITS_10:
-      highbd_10_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
-                         a->y_crop_width, a->y_crop_height, &sse, &sum);
-      return (int) sse;
-    case VPX_BITS_12:
-      highbd_12_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
-                         a->y_crop_width, a->y_crop_height, &sse, &sum);
-      return (int) sse;
-    default:
-      assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
-      return -1;
-  }
+
+  return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
+                        a->y_crop_width, a->y_crop_height);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 7872e2cc1..d256935f1 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -304,7 +304,7 @@ typedef struct VP9_COMP {
   unsigned int tok_count[4][1 << 6];
 
   // Ambient reconstruction err target for force key frames
-  int ambient_err;
+  int64_t ambient_err;
 
   RD_OPT rd;
 
@@ -534,11 +534,10 @@ static INLINE int allocated_tokens(TileInfo tile) {
   return get_token_alloc(tile_mb_rows, tile_mb_cols);
 }
 
-int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
+int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
 #if CONFIG_VP9_HIGHBITDEPTH
-int vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
-                         const YV12_BUFFER_CONFIG *b,
-                         vpx_bit_depth_t bit_depth);
+int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a,
+                             const YV12_BUFFER_CONFIG *b);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 81334e448..01cc519f9 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -33,16 +33,17 @@ static int get_max_filter_level(const VP9_COMP *cpi) {
 }
 
 
-static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
-                            int filt_level, int partial_frame) {
+static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
+                                VP9_COMP *const cpi,
+                                int filt_level, int partial_frame) {
   VP9_COMMON *const cm = &cpi->common;
-  int filt_err;
+  int64_t filt_err;
 
   vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1,
                         partial_frame);
 #if CONFIG_VP9_HIGHBITDEPTH
   if (cm->use_highbitdepth) {
-    filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
+    filt_err = vp9_highbd_get_y_sse(sd, cm->frame_to_show);
   } else {
     filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
   }
@@ -63,14 +64,15 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
   const int min_filter_level = 0;
   const int max_filter_level = get_max_filter_level(cpi);
   int filt_direction = 0;
-  int best_err, filt_best;
+  int64_t best_err;
+  int filt_best;
 
   // Start the search at the previous frame filter level unless it is now out of
   // range.
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
   // Sum squared error at each filter level
-  int ss_err[MAX_LOOP_FILTER + 1];
+  int64_t ss_err[MAX_LOOP_FILTER + 1];
 
   // Set each entry to -1
   vpx_memset(ss_err, 0xFF, sizeof(ss_err));
@@ -87,7 +89,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
 
     // Bias against raising loop filter in favor of lowering it.
-    int bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
+    int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
 
     if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
       bias = (bias * cpi->twopass.section_intra_rating) / 20;
-- 
cgit v1.2.3