12 files changed, 366 insertions, 429 deletions
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 323c10350..d1437d377 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -136,7 +136,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
   const int xmis = MIN(cm->mi_cols - mi_col, bw);
   const int ymis = MIN(cm->mi_rows - mi_row, bh);
   const int block_index = mi_row * cm->mi_cols + mi_col;
-  const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd);
+  const int refresh_this_block = cpi->mb.in_static_area ||
+                                 candidate_refresh_aq(cr, mbmi, bsize, use_rd);
   // Default is to not update the refresh map.
   int new_map_value = cr->map[block_index];
   int x = 0; int y = 0;
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index cad124125..fcf2a0420 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -162,6 +162,8 @@ struct macroblock {
   // note that token_costs is the cost when eob node is skipped
   vp9_coeff_cost token_costs[TX_SIZES];
 
+  int in_static_area;
+
   int optimize;
 
   // indicate if it is in the rd search loop or encoding process
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f88ce2d1a..9956acc0b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1273,6 +1273,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
 
 static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
                          int mi_row, int mi_col) {
+  MACROBLOCK *x = &cpi->mb;
   uint8_t *src, *pre;
   int src_stride, pre_stride;
 
@@ -1304,7 +1305,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
     threshold = (row8x8_remaining * col8x8_remaining) << 6;
   }
 
-  return (this_sad < 2 * threshold);
+  x->in_static_area = (this_sad < 2 * threshold);
+  return x->in_static_area;
 }
 
 static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
@@ -2433,6 +2435,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   set_offsets(cpi, tile, mi_row, mi_col, bsize);
   xd->mi[0]->mbmi.sb_type = bsize;
 
+  if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+    if (xd->mi[0]->mbmi.segment_id && x->in_static_area)
+      x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
+  }
+
   if (!frame_is_intra_only(cm)) {
     vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
                         rate, dist, bsize);
@@ -2856,7 +2863,8 @@ static void nonrd_use_partition(VP9_COMP *cpi,
 static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                                 int mi_row, TOKENEXTRA **tp) {
   VP9_COMMON *cm = &cpi->common;
-  MACROBLOCKD *xd = &cpi->mb.e_mbd;
+  MACROBLOCK *x = &cpi->mb;
+  MACROBLOCKD *xd = &x->e_mbd;
   int mi_col;
 
   // Initialize the left context for the new SB row
@@ -2874,8 +2882,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
     MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
     BLOCK_SIZE bsize;
 
-    cpi->mb.source_variance = UINT_MAX;
-    vp9_zero(cpi->mb.pred_mv);
+    x->in_static_area = 0;
+    x->source_variance = UINT_MAX;
+    vp9_zero(x->pred_mv);
 
     // Set the partition type of the 64X64 block
     switch (cpi->sf.partition_search_type) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index f9dba9689..1dacfaaae 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -61,7 +61,7 @@
 #define MIN_GF_INTERVAL             4
 #endif
 
-#define DISABLE_RC_LONG_TERM_MEM
+#define LONG_TERM_VBR_CORRECTION
 
 static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
   YV12_BUFFER_CONFIG temp = *a;
@@ -1033,6 +1033,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
 
     reset_fpf_position(twopass, start_pos);
   }
+
+  // Reset the vbr bits off target counter
+  cpi->rc.vbr_bits_off_target = 0;
 }
 
 // This function gives an estimate of how badly we believe the prediction
@@ -2192,6 +2195,23 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
   cpi->rc.frames_to_key = INT_MAX;
 }
 
+// For VBR...adjustment to the frame target based on error from previous frames
+void vbr_rate_correction(int * this_frame_target,
+                         const int64_t vbr_bits_off_target) {
+  int max_delta = (*this_frame_target * 15) / 100;
+
+  // vbr_bits_off_target > 0 means we have extra bits to spend
+  if (vbr_bits_off_target > 0) {
+    *this_frame_target +=
+      (vbr_bits_off_target > max_delta) ? max_delta
+                                        : (int)vbr_bits_off_target;
+  } else {
+    *this_frame_target -=
+      (vbr_bits_off_target < -max_delta) ? max_delta
+                                         : (int)-vbr_bits_off_target;
+  }
+}
+
 void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -2219,8 +2239,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     return;
 
   if (cpi->refresh_alt_ref_frame) {
+    int modified_target = twopass->gf_bits;
+    rc->base_frame_target = twopass->gf_bits;
     cm->frame_type = INTER_FRAME;
-    vp9_rc_set_frame_target(cpi, twopass->gf_bits);
+#ifdef LONG_TERM_VBR_CORRECTION
+    // Correction to rate target based on prior over or under shoot.
+    if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+      vbr_rate_correction(&modified_target, rc->vbr_bits_off_target);
+#endif
+    vp9_rc_set_frame_target(cpi, modified_target);
     return;
   }
 
@@ -2315,6 +2342,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target);
   else
     target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target);
+
+  rc->base_frame_target = target;
+#ifdef LONG_TERM_VBR_CORRECTION
+  // Correction to rate target based on prior over or under shoot.
+  if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+    vbr_rate_correction(&target, rc->vbr_bits_off_target);
+#endif
   vp9_rc_set_frame_target(cpi, target);
 
   // Update the total stats remaining structure.
@@ -2322,20 +2356,45 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 }
 
 void vp9_twopass_postencode_update(VP9_COMP *cpi) {
-#ifdef DISABLE_RC_LONG_TERM_MEM
-  const uint64_t bits_used = cpi->rc.this_frame_target;
+  RATE_CONTROL *const rc = &cpi->rc;
+#ifdef LONG_TERM_VBR_CORRECTION
+  // In this experimental mode, the VBR correction is done exclusively through
+  // rc->vbr_bits_off_target. Based on the sign of this value, a limited %
+  // adjustment is made to the target rate of subsequent frames, to try and
+  // push it back towards 0. This mode is less likely to suffer from
+  // extreme behaviour at the end of a clip or group of frames.
+  const int bits_used = rc->base_frame_target;
+  rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
 #else
-  const uint64_t bits_used = cpi->rc.projected_frame_size;
+  // In this mode, VBR correction is acheived by altering bits_left,
+  // kf_group_bits & gf_group_bits to reflect any deviation from the target
+  // rate in this frame. This alters the allocation of bits to the
+  // remaning frames in the group / clip.
+  //
+  // This method can give rise to unstable behaviour near the end of a clip
+  // or kf/gf group of frames where any accumulated error is corrected over an
+  // ever decreasing number of frames. Hence we change the balance of target
+  // vs. actual bitrate gradually as we progress towards the end of the
+  // sequence in order to mitigate this effect.
+  const double progress =
+      (double)(cpi->twopass.stats_in - cpi->twopass.stats_in_start) /
+              (cpi->twopass.stats_in_end - cpi->twopass.stats_in_start);
+  const int bits_used = progress * cpi->rc.this_frame_target +
+                        (1.0 - progress) * cpi->rc.projected_frame_size;
 #endif
+
   cpi->twopass.bits_left -= bits_used;
   cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0);
-  // Update bits left to the kf and gf groups to account for overshoot or
-  // undershoot on these frames.
+
+#ifdef LONG_TERM_VBR_CORRECTION
+  if (cpi->common.frame_type != KEY_FRAME) {
+#else
   if (cpi->common.frame_type == KEY_FRAME) {
     // For key frames kf_group_bits already had the target bits subtracted out.
     // So now update to the correct value based on the actual bits used.
     cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used;
   } else {
+#endif
     cpi->twopass.kf_group_bits -= bits_used;
     cpi->twopass.gf_group_bits -= bits_used;
     cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 1aa250e8b..23a68729b 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -373,122 +373,6 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) {
   }
 }
 
-static void set_rd_speed_thresholds(VP9_COMP *cpi) {
-  int i;
-  RD_OPT *const rd = &cpi->rd;
-
-  // Set baseline threshold values
-  for (i = 0; i < MAX_MODES; ++i)
-    rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
-
-  rd->thresh_mult[THR_NEARESTMV] = 0;
-  rd->thresh_mult[THR_NEARESTG] = 0;
-  rd->thresh_mult[THR_NEARESTA] = 0;
-
-  rd->thresh_mult[THR_DC] += 1000;
-
-  rd->thresh_mult[THR_NEWMV] += 1000;
-  rd->thresh_mult[THR_NEWA] += 1000;
-  rd->thresh_mult[THR_NEWG] += 1000;
-
-  rd->thresh_mult[THR_NEARMV] += 1000;
-  rd->thresh_mult[THR_NEARA] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
-  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
-  rd->thresh_mult[THR_TM] += 1000;
-
-  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
-  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
-  rd->thresh_mult[THR_NEARG] += 1000;
-  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
-  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
-
-  rd->thresh_mult[THR_ZEROMV] += 2000;
-  rd->thresh_mult[THR_ZEROG] += 2000;
-  rd->thresh_mult[THR_ZEROA] += 2000;
-  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
-  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
-  rd->thresh_mult[THR_H_PRED] += 2000;
-  rd->thresh_mult[THR_V_PRED] += 2000;
-  rd->thresh_mult[THR_D45_PRED ] += 2500;
-  rd->thresh_mult[THR_D135_PRED] += 2500;
-  rd->thresh_mult[THR_D117_PRED] += 2500;
-  rd->thresh_mult[THR_D153_PRED] += 2500;
-  rd->thresh_mult[THR_D207_PRED] += 2500;
-  rd->thresh_mult[THR_D63_PRED] += 2500;
-
-  /* disable frame modes if flags not set */
-  if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
-    rd->thresh_mult[THR_NEWMV    ] = INT_MAX;
-    rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
-    rd->thresh_mult[THR_ZEROMV   ] = INT_MAX;
-    rd->thresh_mult[THR_NEARMV   ] = INT_MAX;
-  }
-  if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
-    rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
-    rd->thresh_mult[THR_ZEROG    ] = INT_MAX;
-    rd->thresh_mult[THR_NEARG    ] = INT_MAX;
-    rd->thresh_mult[THR_NEWG     ] = INT_MAX;
-  }
-  if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
-    rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
-    rd->thresh_mult[THR_ZEROA    ] = INT_MAX;
-    rd->thresh_mult[THR_NEARA    ] = INT_MAX;
-    rd->thresh_mult[THR_NEWA     ] = INT_MAX;
-  }
-
-  if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
-      (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
-    rd->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
-  }
-  if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
-      (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
-    rd->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
-    rd->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
-  }
-}
-
-static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  RD_OPT *const rd = &cpi->rd;
-  int i;
-
-  for (i = 0; i < MAX_REFS; ++i)
-    rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode)  ? -500 : 0;
-
-  rd->thresh_mult_sub8x8[THR_LAST] += 2500;
-  rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
-  rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
-  rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
-  rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
-  rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
-
-  // Check for masked out split cases.
-  for (i = 0; i < MAX_REFS; i++)
-    if (sf->disable_split_mask & (1 << i))
-      rd->thresh_mult_sub8x8[i] = INT_MAX;
-
-  // disable mode test if frame flag is not set
-  if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
-    rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
-  if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
-    rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
-  if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
-    rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
-  if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
-      (VP9_LAST_FLAG | VP9_ALT_FLAG))
-    rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
-  if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
-      (VP9_GOLD_FLAG | VP9_ALT_FLAG))
-    rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
-}
 
 static void set_speed_features(VP9_COMP *cpi) {
 #if CONFIG_INTERNAL_STATS
@@ -500,8 +384,8 @@ static void set_speed_features(VP9_COMP *cpi) {
   vp9_set_speed_features(cpi);
 
   // Set rd thresholds based on mode and speed setting
-  set_rd_speed_thresholds(cpi);
-  set_rd_speed_thresholds_sub8x8(cpi);
+  vp9_set_rd_speed_thresholds(cpi);
+  vp9_set_rd_speed_thresholds_sub8x8(cpi);
 
   cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
   if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
@@ -567,7 +451,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
   vp9_setup_pc_tree(&cpi->common, &cpi->mb);
 }
 
-
 static void update_frame_size(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -1860,8 +1743,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
   recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
 
   if (cpi->twopass.total_left_stats.coded_error != 0.0)
-    fprintf(f, "%10u %10d %10d %10d %10d %10d "
-        "%10"PRId64" %10"PRId64" %10d "
+    fprintf(f, "%10u %10d %10d %10d %10d"
+        "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d "
         "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
         "%6d %6d %5d %5d %5d "
         "%10"PRId64" %10.3lf"
@@ -1870,6 +1753,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
         cpi->rc.projected_frame_size,
         cpi->rc.projected_frame_size / cpi->common.MBs,
         (cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
+        cpi->rc.vbr_bits_off_target,
         cpi->rc.total_target_vs_actual,
         (cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target),
         cpi->rc.total_actual_bits, cm->base_qindex,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b63d0da5e..b8d0ec40d 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -85,7 +85,7 @@ void vp9_rc_init_minq_luts() {
     gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
     afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
     afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
-    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55);
+    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
   }
 }
 
@@ -1033,6 +1033,7 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
   RATE_CONTROL *const rc = &cpi->rc;
 
   rc->this_frame_target = target;
+
   // Target rate per SB64 (including partial SB64s.
   rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
                              (cm->width * cm->height);
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index fbeca829f..820366119 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -27,7 +27,9 @@ extern "C" {
 
 typedef struct {
   // Rate targetting variables
-  int this_frame_target;
+  int base_frame_target;           // A baseline frame target before adjustment
+                                   // for previous under or over shoot.
+  int this_frame_target;           // Actual frame target after rc adjustment.
   int projected_frame_size;
   int sb64_target_rate;
   int last_q[3];                   // Separate values for Intra/Inter/ARF-GF
@@ -67,6 +69,7 @@ typedef struct {
 
   int64_t buffer_level;
   int64_t bits_off_target;
+  int64_t vbr_bits_off_target;
 
   int decimation_factor;
   int decimation_count;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 63c099d75..55ae721be 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -4374,3 +4374,120 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
   return best_rd;
 }
+
+void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
+  int i;
+  RD_OPT *const rd = &cpi->rd;
+
+  // Set baseline threshold values
+  for (i = 0; i < MAX_MODES; ++i)
+    rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+
+  rd->thresh_mult[THR_NEARESTMV] = 0;
+  rd->thresh_mult[THR_NEARESTG] = 0;
+  rd->thresh_mult[THR_NEARESTA] = 0;
+
+  rd->thresh_mult[THR_DC] += 1000;
+
+  rd->thresh_mult[THR_NEWMV] += 1000;
+  rd->thresh_mult[THR_NEWA] += 1000;
+  rd->thresh_mult[THR_NEWG] += 1000;
+
+  rd->thresh_mult[THR_NEARMV] += 1000;
+  rd->thresh_mult[THR_NEARA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+  rd->thresh_mult[THR_TM] += 1000;
+
+  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+  rd->thresh_mult[THR_NEARG] += 1000;
+  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+  rd->thresh_mult[THR_ZEROMV] += 2000;
+  rd->thresh_mult[THR_ZEROG] += 2000;
+  rd->thresh_mult[THR_ZEROA] += 2000;
+  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+  rd->thresh_mult[THR_H_PRED] += 2000;
+  rd->thresh_mult[THR_V_PRED] += 2000;
+  rd->thresh_mult[THR_D45_PRED ] += 2500;
+  rd->thresh_mult[THR_D135_PRED] += 2500;
+  rd->thresh_mult[THR_D117_PRED] += 2500;
+  rd->thresh_mult[THR_D153_PRED] += 2500;
+  rd->thresh_mult[THR_D207_PRED] += 2500;
+  rd->thresh_mult[THR_D63_PRED] += 2500;
+
+  /* disable frame modes if flags not set */
+  if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
+    rd->thresh_mult[THR_NEWMV    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+    rd->thresh_mult[THR_ZEROMV   ] = INT_MAX;
+    rd->thresh_mult[THR_NEARMV   ] = INT_MAX;
+  }
+  if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+    rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWG     ] = INT_MAX;
+  }
+  if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
+    rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWA     ] = INT_MAX;
+  }
+
+  if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+      (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+    rd->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
+  }
+  if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+      (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+    rd->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
+  }
+}
+
+void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
+  const SPEED_FEATURES *const sf = &cpi->sf;
+  RD_OPT *const rd = &cpi->rd;
+  int i;
+
+  for (i = 0; i < MAX_REFS; ++i)
+    rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode)  ? -500 : 0;
+
+  rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+  rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+  rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+  rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+  rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+  rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+
+  // Check for masked out split cases.
+  for (i = 0; i < MAX_REFS; i++)
+    if (sf->disable_split_mask & (1 << i))
+      rd->thresh_mult_sub8x8[i] = INT_MAX;
+
+  // disable mode test if frame flag is not set
+  if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
+    rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+  if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
+    rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+  if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
+    rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+  if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+      (VP9_LAST_FLAG | VP9_ALT_FLAG))
+    rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+  if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+      (VP9_GOLD_FLAG | VP9_ALT_FLAG))
+    rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index a01dbd4d3..e48566499 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -83,6 +83,10 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
                               ENTROPY_CONTEXT t_above[16],
                               ENTROPY_CONTEXT t_left[16]);
 
+void vp9_set_rd_speed_thresholds(VP9_COMP *cpi);
+
+void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index c98c9d415..a5694161c 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -27,8 +27,6 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vpx_scale/vpx_scale.h"
 
-#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
-
 static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                                             uint8_t *y_mb_ptr,
                                             uint8_t *u_mb_ptr,
@@ -122,8 +120,6 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
   }
 }
 
-#if ALT_REF_MC_ENABLED
-
 static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
                                               uint8_t *arf_frame_buf,
                                               uint8_t *frame_ptr_buf,
@@ -133,6 +129,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
   int step_param;
   int sadpb = x->sadperbit16;
   int bestsme = INT_MAX;
+  int distortion;
+  unsigned int sse;
 
   MV best_ref_mv1 = {0, 0};
   MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
@@ -154,26 +152,19 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
   step_param = cpi->sf.reduce_first_step_size + (cpi->oxcf.speed > 5 ? 1 : 0);
   step_param = MIN(step_param, cpi->sf.max_step_search_steps - 2);
 
-  /*cpi->sf.search_method == HEX*/
   // Ignore mv costing by sending NULL pointer instead of cost arrays
   vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
                  &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
 
-  // Try sub-pixel MC?
-  // if (bestsme > error_thresh && bestsme < INT_MAX)
-  {
-    int distortion;
-    unsigned int sse;
-    // Ignore mv costing by sending NULL pointer instead of cost array
-    bestsme = cpi->find_fractional_mv_step(x, ref_mv,
-                                           &best_ref_mv1,
-                                           cpi->common.allow_high_precision_mv,
-                                           x->errorperbit,
-                                           &cpi->fn_ptr[BLOCK_16X16],
-                                           0, cpi->sf.subpel_iters_per_step,
-                                           NULL, NULL,
-                                           &distortion, &sse);
-  }
+  // Ignore mv costing by sending NULL pointer instead of cost array
+  bestsme = cpi->find_fractional_mv_step(x, ref_mv,
+                                         &best_ref_mv1,
+                                         cpi->common.allow_high_precision_mv,
+                                         x->errorperbit,
+                                         &cpi->fn_ptr[BLOCK_16X16],
+                                         0, cpi->sf.subpel_iters_per_step,
+                                         NULL, NULL,
+                                         &distortion, &sse);
 
   // Restore input state
   x->plane[0].src = src;
@@ -181,7 +172,6 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
 
   return bestsme;
 }
-#endif
 
 static void temporal_filter_iterate_c(VP9_COMP *cpi,
                                       int frame_count,
@@ -215,8 +205,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
     input_buffer[i] = mbd->plane[i].pre[0].buf;
 
   for (mb_row = 0; mb_row < mb_rows; mb_row++) {
-#if ALT_REF_MC_ENABLED
-    // Source frames are extended to 16 pixels.  This is different than
+    // Source frames are extended to 16 pixels. This is different than
     //  L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
     // A 6/8 tap filter is used for motion search.  This requires 2 pixels
     //  before and 3 pixels after.  So the largest Y mv on a border would
@@ -230,7 +219,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
     cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
     cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
                          + (17 - 2 * VP9_INTERP_EXTEND);
-#endif
 
     for (mb_col = 0; mb_col < mb_cols; mb_col++) {
       int i, j, k;
@@ -239,13 +227,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
       vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
       vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
 
-#if ALT_REF_MC_ENABLED
       cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
       cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
                            + (17 - 2 * VP9_INTERP_EXTEND);
-#endif
 
       for (frame = 0; frame < frame_count; frame++) {
+        const int thresh_low  = 10000;
+        const int thresh_high = 20000;
+
         if (cpi->frames[frame] == NULL)
           continue;
 
@@ -255,38 +244,31 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
         if (frame == alt_ref_index) {
           filter_weight = 2;
         } else {
-          int err = 0;
-#if ALT_REF_MC_ENABLED
-#define THRESH_LOW   10000
-#define THRESH_HIGH  20000
-
           // Find best match in this frame by MC
-          err = temporal_filter_find_matching_mb_c
-                (cpi,
-                 cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
-                 cpi->frames[frame]->y_buffer + mb_y_offset,
-                 cpi->frames[frame]->y_stride);
-#endif
+          int err = temporal_filter_find_matching_mb_c(cpi,
+              cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
+              cpi->frames[frame]->y_buffer + mb_y_offset,
+              cpi->frames[frame]->y_stride);
+
           // Assign higher weight to matching MB if it's error
           // score is lower. If not applying MC default behavior
           // is to weight all MBs equal.
-          filter_weight = err < THRESH_LOW
-                          ? 2 : err < THRESH_HIGH ? 1 : 0;
+          filter_weight = err < thresh_low
+                          ? 2 : err < thresh_high ? 1 : 0;
         }
 
         if (filter_weight != 0) {
           // Construct the predictors
-          temporal_filter_predictors_mb_c
-          (mbd,
-           cpi->frames[frame]->y_buffer + mb_y_offset,
-           cpi->frames[frame]->u_buffer + mb_uv_offset,
-           cpi->frames[frame]->v_buffer + mb_uv_offset,
-           cpi->frames[frame]->y_stride,
-           mb_uv_height,
-           mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
-           mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
-           predictor, scale,
-           mb_col * 16, mb_row * 16);
+          temporal_filter_predictors_mb_c(mbd,
+              cpi->frames[frame]->y_buffer + mb_y_offset,
+              cpi->frames[frame]->u_buffer + mb_uv_offset,
+              cpi->frames[frame]->v_buffer + mb_uv_offset,
+              cpi->frames[frame]->y_stride,
+              mb_uv_height,
+              mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
+              mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
+              predictor, scale,
+              mb_col * 16, mb_row * 16);
 
           // Apply the filter (YUV)
           vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
@@ -320,7 +302,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
           // move to next pixel
           byte++;
         }
-
         byte += stride - 16;
       }
 
@@ -347,14 +328,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
           // move to next pixel
           byte++;
         }
-
         byte += stride - mb_uv_height;
       }
-
       mb_y_offset += 16;
       mb_uv_offset += mb_uv_height;
     }
-
     mb_y_offset += 16 * (f->y_stride - mb_cols);
     mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols);
   }
@@ -366,18 +344,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
 
 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
   VP9_COMMON *const cm = &cpi->common;
-
   int frame = 0;
-
   int frames_to_blur_backward = 0;
   int frames_to_blur_forward = 0;
   int frames_to_blur = 0;
   int start_frame = 0;
-
   int strength = cpi->active_arnr_strength;
   int blur_type = cpi->oxcf.arnr_type;
   int max_frames = cpi->active_arnr_frames;
-
   const int num_frames_backward = distance;
   const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
                                - (num_frames_backward + 1);
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 71867a938..a31a476f3 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -18,63 +18,34 @@
 
 #include "vp9/encoder/vp9_variance.h"
 
-void variance(const uint8_t *src_ptr,
-              int  source_stride,
-              const uint8_t *ref_ptr,
-              int  recon_stride,
-              int  w,
-              int  h,
-              unsigned int *sse,
-              int *sum) {
+void variance(const uint8_t *a, int  a_stride,
+              const uint8_t *b, int  b_stride,
+              int  w, int  h, unsigned int *sse, int *sum) {
   int i, j;
-  int diff;
 
   *sum = 0;
   *sse = 0;
 
   for (i = 0; i < h; i++) {
     for (j = 0; j < w; j++) {
-      diff = src_ptr[j] - ref_ptr[j];
+      const int diff = a[j] - b[j];
       *sum += diff;
       *sse += diff * diff;
     }
 
-    src_ptr += source_stride;
-    ref_ptr += recon_stride;
+    a += a_stride;
+    b += b_stride;
   }
 }
 
-/****************************************************************************
- *
- *  ROUTINE       : filter_block2d_bil_first_pass
- *
- *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
- *                  uint32_t src_pixels_per_line : Stride of input block.
- *                  uint32_t pixel_step        : Offset between filter input
- *                                               samples (see notes).
- *                  uint32_t output_height     : Input block height.
- *                  uint32_t output_width      : Input block width.
- *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
- *                                               taps.
- *
- *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
- *                  either horizontal or vertical direction to produce the
- *                  filtered output block. Used to implement first-pass
- *                  of 2-D separable filter.
- *
- *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
- *                  pixel_step defines whether the filter is applied
- *                  horizontally (pixel_step=1) or vertically (pixel_step=
- *                  stride).
- *                  It defines the offset required to move from one input
- *                  to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// first-pass of 2-D separable filter.
+//
+// Produces int32_t output to retain precision for next pass. Two filter taps
+// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
+// defines the offset required to move from one input to the next.
 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                               uint16_t *output_ptr,
                                               unsigned int src_pixels_per_line,
@@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
   }
 }
 
-/****************************************************************************
- *
- *  ROUTINE       : filter_block2d_bil_second_pass
- *
- *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
- *                  uint32_t src_pixels_per_line : Stride of input block.
- *                  uint32_t pixel_step        : Offset between filter input
- *                                               samples (see notes).
- *                  uint32_t output_height     : Input block height.
- *                  uint32_t output_width      : Input block width.
- *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
- *                                               taps.
- *
- *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
- *                  either horizontal or vertical direction to produce the
- *                  filtered output block. Used to implement second-pass
- *                  of 2-D separable filter.
- *
- *  SPECIAL NOTES : Requires 32-bit input as produced by
- *                  filter_block2d_bil_first_pass.
- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
- *                  pixel_step defines whether the filter is applied
- *                  horizontally (pixel_step=1) or vertically (pixel_step=
- *                  stride).
- *                  It defines the offset required to move from one input
- *                  to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// second-pass of 2-D separable filter.
+//
+// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
+// stride). It defines the offset required to move from one input to the next.
 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                                uint8_t *output_ptr,
                                                unsigned int src_pixels_per_line,
@@ -156,9 +103,8 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
   unsigned int i, sum = 0;
 
-  for (i = 0; i < 256; i++) {
-    sum += (src_ptr[i] * src_ptr[i]);
-  }
+  for (i = 0; i < 256; i++)
+    sum += src_ptr[i] * src_ptr[i];
 
   return sum;
 }
@@ -183,12 +129,10 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
+  uint16_t fdata3[65 * 64];
   uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
@@ -205,13 +149,11 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
+  uint16_t fdata3[65 * 64];
   uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 64, hfilter);
@@ -240,12 +182,10 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
+  uint16_t fdata3[65 * 64];
   uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
@@ -262,13 +202,11 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
+  uint16_t fdata3[65 * 64];
   uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 32, hfilter);
@@ -297,12 +235,10 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
@@ -319,13 +255,11 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 32, hfilter);
@@ -354,12 +288,10 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
@@ -376,13 +308,11 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 16, hfilter);
@@ -582,17 +512,12 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  uint16_t fdata3[5 * 4];
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
-  // First filter 1d Horizontal
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 4, hfilter);
-
-  // Now filter Verticaly
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
 
   return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
@@ -607,18 +532,13 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
-  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);
+  uint16_t fdata3[5 * 4];
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
-  // First filter 1d Horizontal
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 4, hfilter);
-
-  // Now filter Verticaly
   var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
   vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
   return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
@@ -631,17 +551,14 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
                                          const uint8_t *dst_ptr,
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
-  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
+  uint16_t fdata3[9 * 8];
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-
   return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -653,18 +570,17 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
                                              int dst_pixels_per_line,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
-  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
+  uint16_t fdata3[9 * 8];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 8, hfilter);
   var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
   vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
+
   return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
 }
 
@@ -675,12 +591,10 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
+  uint16_t fdata3[17 * 16];
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 16, hfilter);
@@ -699,11 +613,9 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
                                                const uint8_t *second_pred) {
   uint16_t fdata3[17 * 16];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 16, hfilter);
@@ -722,10 +634,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
                                            unsigned int *sse) {
   uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
   uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
@@ -742,13 +652,11 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
+  uint16_t fdata3[65 * 64];
   uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 65, 64, hfilter);
@@ -764,12 +672,10 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
                                            const uint8_t *dst_ptr,
                                            int dst_pixels_per_line,
                                            unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
@@ -786,13 +692,11 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
                                                int dst_pixels_per_line,
                                                unsigned int *sse,
                                                const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
+  uint16_t fdata3[33 * 32];
   uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 33, 32, hfilter);
@@ -928,12 +832,10 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
                                           const uint8_t *dst_ptr,
                                           int dst_pixels_per_line,
                                           unsigned int *sse) {
-  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
+  uint16_t fdata3[16 * 9];
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
@@ -950,13 +852,11 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
-  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
+  uint16_t fdata3[16 * 9];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 16, hfilter);
@@ -974,10 +874,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
                                           unsigned int *sse) {
   uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
@@ -994,13 +892,11 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
                                               int dst_pixels_per_line,
                                               unsigned int *sse,
                                               const uint8_t *second_pred) {
-  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
+  uint16_t fdata3[9 * 16];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 17, 8, hfilter);
@@ -1016,12 +912,10 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
                                          const uint8_t *dst_ptr,
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
-  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
+  uint16_t fdata3[8 * 5];
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
@@ -1038,13 +932,11 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
                                              int dst_pixels_per_line,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
-  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
+  uint16_t fdata3[8 * 5];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 5, 8, hfilter);
@@ -1060,14 +952,12 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
                                          const uint8_t *dst_ptr,
                                          int dst_pixels_per_line,
                                          unsigned int *sse) {
-  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
+  uint16_t fdata3[5 * 8];
   // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
   // of this big? same issue appears in all other block size settings.
   uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
@@ -1084,13 +974,11 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
                                              int dst_pixels_per_line,
                                              unsigned int *sse,
                                              const uint8_t *second_pred) {
-  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
+  uint16_t fdata3[5 * 8];
   uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);
+  const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+  const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
 
   var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
                                     1, 9, 4, hfilter);
@@ -1106,9 +994,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
 
   for (i = 0; i < height; i++) {
     for (j = 0; j < width; j++) {
-      int tmp;
-      tmp = pred[j] + ref[j];
-      comp_pred[j] = (tmp + 1) >> 1;
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     }
     comp_pred += width;
     pred += width;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index c9e39a1a2..4c8be71cd 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -17,14 +17,10 @@
 extern "C" {
 #endif
 
-void variance(const uint8_t *src_ptr,
-              int  source_stride,
-              const uint8_t *ref_ptr,
-              int  recon_stride,
-              int  w,
-              int  h,
-              unsigned int *sse,
-              int *sum);
+void variance(const uint8_t *a, int a_stride,
+              const uint8_t *b, int b_stride,
+              int  w, int  h,
+              unsigned int *sse, int *sum);
 
 typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
                                     int source_stride,