7 files changed, 85 insertions, 103 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 2ab4c7907..892c4a7ec 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -135,11 +135,6 @@ static void update_switchable_interp_probs(VP9_COMP *cpi, vp9_writer *w) {
     prob_diff_update(vp9_switchable_interp_tree,
                      cm->fc.switchable_interp_prob[j],
                      cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w);
-
-#ifdef MODE_STATS
-  if (!cpi->dummy_packing)
-    update_switchable_interp_stats(cm);
-#endif
 }
 
 static void pack_mb_tokens(vp9_writer* const w,
@@ -912,10 +907,6 @@ static void encode_txfm_probs(VP9_COMP *cpi, vp9_writer *w) {
         vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
                                   ct_32x32p[j]);
     }
-#ifdef MODE_STATS
-    if (!cpi->dummy_packing)
-      update_tx_count_stats(cm);
-#endif
   }
 }
 
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index ad9cc00b1..d48daceca 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -184,12 +184,6 @@ static INLINE int sp(int x) {
   return (x & 7) << 1;
 }
 
-#define IFMVCV(r, c, s, e)                                \
-    if (c >= minc && c <= maxc && r >= minr && r <= maxr) \
-      s                                                   \
-    else                                                  \
-      e;
-
 static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
   return &buf[(r >> 3) * stride + (c >> 3) - offset];
 }
@@ -201,17 +195,18 @@ static INLINE uint8_t *pre(uint8_t *buf, int stride, int r, int c, int offset) {
 
 /* checks if (r, c) has better score than previous best */
 #define CHECK_BETTER(v, r, c) \
-    IFMVCV(r, c, {                                                       \
-      thismse = (DIST(r, c));                                            \
-      if ((v = MVC(r, c) + thismse) < besterr) {                         \
-        besterr = v;                                                     \
-        br = r;                                                          \
-        bc = c;                                                          \
-        *distortion = thismse;                                           \
-        *sse1 = sse;                                                     \
-      }                                                                  \
-    },                                                                   \
-    v = INT_MAX;)
+  if (c >= minc && c <= maxc && r >= minr && r <= maxr) {              \
+    thismse = (DIST(r, c));                                            \
+    if ((v = MVC(r, c) + thismse) < besterr) {                         \
+      besterr = v;                                                     \
+      br = r;                                                          \
+      bc = c;                                                          \
+      *distortion = thismse;                                           \
+      *sse1 = sse;                                                     \
+    }                                                                  \
+  } else {                                                             \
+    v = INT_MAX;                                                       \
+  }
 
 #define FIRST_LEVEL_CHECKS                              \
   {                                                     \
@@ -469,7 +464,6 @@ int vp9_find_best_sub_pixel_comp_tree(MACROBLOCK *x,
 #undef MVC
 #undef PRE
 #undef DIST
-#undef IFMVCV
 #undef CHECK_BETTER
 #undef SP
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 27531d232..38189edca 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -93,14 +93,6 @@ FILE *kf_list;
 FILE *keyfile;
 #endif
 
-
-#ifdef MODE_STATS
-extern void init_tx_count_stats();
-extern void write_tx_count_stats();
-extern void init_switchable_interp_stats();
-extern void write_switchable_interp_stats();
-#endif
-
 #ifdef SPEEDSTATS
 unsigned int frames_at_speed[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                     0, 0, 0};
@@ -851,6 +843,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
       sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
     }
+    sf->use_fast_lpf_pick = 2;
   }
 }
 
@@ -969,7 +962,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
                                cpi->oxcf.width, cpi->oxcf.height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate altref buffer");
 }
@@ -1037,14 +1030,14 @@ static void update_frame_size(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate last frame buffer");
 
   if (vp9_realloc_frame_buffer(&cpi->scaled_source,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
+                               VP9_ENC_BORDER_IN_PIXELS))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate scaled source buffer");
 
@@ -1631,11 +1624,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
     init_context_counters();
 #endif
 
-#ifdef MODE_STATS
-  init_tx_count_stats();
-  init_switchable_interp_stats();
-#endif
-
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -1892,13 +1880,6 @@ void vp9_remove_compressor(VP9_PTR *ptr) {
       vp9_end_second_pass(cpi);
     }
 
-#ifdef MODE_STATS
-    if (cpi->pass != 1) {
-      write_tx_count_stats();
-      write_switchable_interp_stats();
-    }
-#endif
-
 #if CONFIG_INTERNAL_STATS
 
     vp9_clear_system_state();
@@ -2589,7 +2570,7 @@ static void scale_references(VP9_COMP *cpi) {
       vp9_realloc_frame_buffer(&cm->yv12_fb[new_fb],
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+                               VP9_ENC_BORDER_IN_PIXELS);
       scale_and_extend_frame(ref, &cm->yv12_fb[new_fb]);
       cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
     } else {
@@ -3580,8 +3561,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
                            cm->width, cm->height,
                            cm->subsampling_x, cm->subsampling_y,
-                           VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
-
+                           VP9_ENC_BORDER_IN_PIXELS);
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
@@ -3872,24 +3852,25 @@ void vp9_set_svc(VP9_PTR comp, int use_svc) {
   return;
 }
 
-int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) {
+int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *reference) {
   int i, j;
   int total = 0;
 
-  uint8_t *src = source->y_buffer;
-  uint8_t *dst = dest->y_buffer;
+  const uint8_t *src = source->y_buffer;
+  const uint8_t *ref = reference->y_buffer;
 
   // Loop through the Y plane raw and reconstruction data summing
   // (square differences)
   for (i = 0; i < source->y_height; i += 16) {
     for (j = 0; j < source->y_width; j += 16) {
       unsigned int sse;
-      total += vp9_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride,
-                            &sse);
+      total += vp9_mse16x16(src + j, source->y_stride,
+                            ref + j, reference->y_stride, &sse);
     }
 
     src += 16 * source->y_stride;
-    dst += 16 * dest->y_stride;
+    ref += 16 * reference->y_stride;
   }
 
   return total;
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 35f8fac69..5838c76d9 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -72,7 +72,6 @@ typedef struct {
   // 0 = ZERO_MV, MV
   signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
 
-  int inter_mode_counts[INTER_MODE_CONTEXTS][INTER_MODES - 1][2];
   FRAME_CONTEXT fc;
 } CODING_CONTEXT;
 
@@ -409,8 +408,10 @@ typedef struct {
   // final encode.
   int use_uv_intra_rd_estimate;
 
-  // This picks a loop filter strength by trying a small portion of the image
-  // with different values.
+  // This feature controls how the loop filter level is determined:
+  // 0: Try the full image with different values.
+  // 1: Try a small portion of the image with different values.
+  // 2: Estimate the level based on quantizer and frame type
   int use_fast_lpf_pick;
 
   // This feature limits the number of coefficients updates we actually do
@@ -809,7 +810,8 @@ void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
 
 void vp9_set_speed_features(VP9_COMP *cpi);
 
-int vp9_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest);
+int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
+                    const YV12_BUFFER_CONFIG *reference);
 
 void vp9_alloc_compressor_data(VP9_COMP *cpi);
 
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index a4ceabdf1..9d73df2ec 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -14,6 +14,7 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
+#include "vp9/common/vp9_quant_common.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_scale/vpx_scale.h"
 #include "vp9/common/vp9_alloccommon.h"
@@ -33,7 +34,8 @@ static int get_max_filter_level(VP9_COMP *cpi, int base_qindex) {
 void vp9_set_alt_lf_level(VP9_COMP *cpi, int filt_val) {
 }
 
-void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
+static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                                int partial) {
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   VP9_COMMON *const cm = &cpi->common;
   struct loopfilter *const lf = &cm->lf;
@@ -48,9 +50,6 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
 
-  lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
-                                                    : cpi->oxcf.sharpness;
-
   //  Make a copy of the unfiltered / processed recon buffer
   vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
 
@@ -128,3 +127,27 @@ void vp9_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial) {
 
   lf->filter_level = filt_best;
 }
+
+void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                           int method) {
+  VP9_COMMON *const cm = &cpi->common;
+  struct loopfilter *const lf = &cm->lf;
+
+  lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0
+                                                    : cpi->oxcf.sharpness;
+
+  if (method == 2) {
+    const int min_filter_level = get_min_filter_level(cpi, cm->base_qindex);
+    const int max_filter_level = get_max_filter_level(cpi, cm->base_qindex);
+    const int q = vp9_ac_quant(cm->base_qindex, 0);
+    // These values were determined by linear fitting the result of the
+    // searched level
+    // filt_guess = q * 0.316206 + 3.87252
+    int filt_guess = (q * 20723 + 1015158 + (1 << 17)) >> 18;
+    if (cm->frame_type == KEY_FRAME)
+      filt_guess -= 4;
+    lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
+  } else {
+    search_filter_level(sd, cpi, method == 1);
+  }
+}
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
index f9cb14c27..0fc1f88b3 100644
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -21,8 +21,8 @@ struct VP9_COMP;
 
 void vp9_set_alt_lf_level(struct VP9_COMP *cpi, int filt_val);
 
-void vp9_pick_filter_level(struct yv12_buffer_config *sd,
-                           struct VP9_COMP *cpi, int partial);
+void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
+                           struct VP9_COMP *cpi, int method);
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 3ebf98c0f..3c816a3d0 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -468,8 +468,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
   // Work out a size correction factor.
   if (projected_size_based_on_q > 0)
-    correction_factor =
-        (100 * cpi->rc.projected_frame_size) / projected_size_based_on_q;
+    correction_factor = (100 * cpi->rc.projected_frame_size) /
+                            projected_size_based_on_q;
 
   // More heavily damped adjustment used if we have been oscillating either side
   // of target.
@@ -514,26 +514,25 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 
 int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
                       int active_best_quality, int active_worst_quality) {
+  const VP9_COMMON *const cm = &cpi->common;
   int q = active_worst_quality;
   int last_error = INT_MAX;
-  int i, target_bits_per_mb, bits_per_mb_at_this_q;
+  int i, target_bits_per_mb;
   const double correction_factor = get_rate_correction_factor(cpi);
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
   if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS))
-    target_bits_per_mb =
-        (target_bits_per_frame / cpi->common.MBs)
-        << BPER_MB_NORMBITS;  // Case where we would overflow int
+    // Case where we would overflow int
+    target_bits_per_mb = (target_bits_per_frame / cm->MBs) << BPER_MB_NORMBITS;
   else
-    target_bits_per_mb =
-        (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs;
+    target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
 
   i = active_best_quality;
 
   do {
-    bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i,
-                                                    correction_factor);
+    const int bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cm->frame_type, i,
+                                                             correction_factor);
 
     if (bits_per_mb_at_this_q <= target_bits_per_mb) {
       if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
@@ -550,25 +549,19 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
   return q;
 }
 
-static int get_active_quality(int q,
-                              int gfu_boost,
-                              int low,
-                              int high,
-                              int *low_motion_minq,
-                              int *high_motion_minq) {
-  int active_best_quality;
+static int get_active_quality(int q, int gfu_boost, int low, int high,
+                              int *low_motion_minq, int *high_motion_minq) {
   if (gfu_boost > high) {
-    active_best_quality = low_motion_minq[q];
+    return low_motion_minq[q];
   } else if (gfu_boost < low) {
-    active_best_quality = high_motion_minq[q];
+    return high_motion_minq[q];
   } else {
     const int gap = high - low;
     const int offset = high - gfu_boost;
     const int qdiff = high_motion_minq[q] - low_motion_minq[q];
     const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap;
-    active_best_quality = low_motion_minq[q] + adjustment;
+    return low_motion_minq[q] + adjustment;
   }
-  return active_best_quality;
 }
 
 int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
@@ -615,8 +608,8 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
       // Convert the adjustment factor to a qindex delta
       // on active_best_quality.
       q_val = vp9_convert_qindex_to_q(active_best_quality);
-      active_best_quality +=
-          vp9_compute_qdelta(cpi, q_val, (q_val * q_adj_factor));
+      active_best_quality += vp9_compute_qdelta(cpi, q_val, q_val *
+                                                   q_adj_factor);
     }
 #else
     double current_q;
@@ -720,15 +713,12 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
   if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cpi->pass == 0 && cm->current_video_frame == 0)) {
-      *top_index =
-          (active_worst_quality + active_best_quality * 3) / 4;
-    }
+    if (!(cpi->pass == 0 && cm->current_video_frame == 0))
+      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
   } else if (!rc->is_src_frame_alt_ref &&
              (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index =
-      (active_worst_quality + active_best_quality) / 2;
+    *top_index = (active_worst_quality + active_best_quality) / 2;
   }
 #endif
 
@@ -818,7 +808,8 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
 
 // return of 0 means drop frame
 int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
-  VP9_COMMON *cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
 
   if (cm->frame_type == KEY_FRAME)
     calc_iframe_target_size(cpi);
@@ -826,12 +817,12 @@ int vp9_rc_pick_frame_size_target(VP9_COMP *cpi) {
     calc_pframe_target_size(cpi);
 
   // Clip the frame target to the maximum allowed value.
-  if (cpi->rc.this_frame_target > cpi->rc.max_frame_bandwidth)
-    cpi->rc.this_frame_target = cpi->rc.max_frame_bandwidth;
+  if (rc->this_frame_target > rc->max_frame_bandwidth)
+    rc->this_frame_target = rc->max_frame_bandwidth;
 
   // Target rate per SB64 (including partial SB64s.
-  cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
-                             (cpi->common.width * cpi->common.height);
+  rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
+                             (cm->width * cm->height);
   return 1;
 }