31 files changed, 339 insertions, 1172 deletions
diff --git a/vp9/encoder/vp9_aq_variance.h b/vp9/encoder/vp9_aq_variance.h
index 381fe50cf..d1a459fe9 100644
--- a/vp9/encoder/vp9_aq_variance.h
+++ b/vp9/encoder/vp9_aq_variance.h
@@ -12,7 +12,7 @@
 #ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
 #define VP9_ENCODER_VP9_AQ_VARIANCE_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8d2afb991..c5a85c9df 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -47,12 +47,12 @@ void vp9_entropy_mode_init() {
   vp9_tokens_from_tree(inter_mode_encodings, vp9_inter_mode_tree);
 }
 
-static void write_intra_mode(vp9_writer *w, MB_PREDICTION_MODE mode,
+static void write_intra_mode(vp9_writer *w, PREDICTION_MODE mode,
                              const vp9_prob *probs) {
   vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]);
 }
 
-static void write_inter_mode(vp9_writer *w, MB_PREDICTION_MODE mode,
+static void write_inter_mode(vp9_writer *w, PREDICTION_MODE mode,
                              const vp9_prob *probs) {
   assert(is_inter_mode(mode));
   vp9_write_token(w, vp9_inter_mode_tree, probs,
@@ -233,7 +233,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct segmentation *const seg = &cm->seg;
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
-  const MB_PREDICTION_MODE mode = mbmi->mode;
+  const PREDICTION_MODE mode = mbmi->mode;
   const int segment_id = mbmi->segment_id;
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int allow_hp = cm->allow_high_precision_mv;
@@ -273,7 +273,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
       const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
       for (idy = 0; idy < 2; idy += num_4x4_h) {
         for (idx = 0; idx < 2; idx += num_4x4_w) {
-          const MB_PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
+          const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
           write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]);
         }
       }
@@ -308,7 +308,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
       for (idy = 0; idy < 2; idy += num_4x4_h) {
         for (idx = 0; idx < 2; idx += num_4x4_w) {
           const int j = idy * 2 + idx;
-          const MB_PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
+          const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
           write_inter_mode(w, b_mode, inter_probs);
           ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
           if (b_mode == NEWMV) {
@@ -903,7 +903,7 @@ static int get_refresh_mask(VP9_COMP *cpi) {
       // other uses are implemented (like RTC/temporal scaling)
       //
       // gld_fb_idx and alt_fb_idx need to be swapped for future frames, but
-      // that happens in vp9_onyx_if.c:update_reference_frames() so that it can
+      // that happens in vp9_encoder.c:update_reference_frames() so that it can
       // be done outside of the recode loop.
       return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
              (cpi->refresh_golden_frame << cpi->alt_fb_idx);
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index 40d7394ed..66a6f00e3 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -11,7 +11,7 @@
 #ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
 #define VP9_ENCODER_VP9_CONTEXT_TREE_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 void vp9_setup_pc_tree(VP9_COMMON *cm, MACROBLOCK *x);
 void vp9_free_pc_tree(MACROBLOCK *x);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 9956acc0b..2e44f7df3 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -2412,7 +2412,7 @@ typedef enum {
 } motion_vector_context;
 
 static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
-                          MB_PREDICTION_MODE mode) {
+                          PREDICTION_MODE mode) {
   mbmi->mode = mode;
   mbmi->uv_mode = mode;
   mbmi->mv[0].as_int = 0;
@@ -2444,8 +2444,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
     vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
                         rate, dist, bsize);
   } else {
-    MB_PREDICTION_MODE intramode = DC_PRED;
-    set_mode_info(&xd->mi[0]->mbmi, bsize, intramode);
+    set_mode_info(&xd->mi[0]->mbmi, bsize, DC_PRED);
   }
   duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
 }
@@ -3209,8 +3208,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
 }
 
 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
-  const MB_PREDICTION_MODE y_mode = mi->mbmi.mode;
-  const MB_PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
+  const PREDICTION_MODE y_mode = mi->mbmi.mode;
+  const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
 
   if (bsize < BLOCK_8X8) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index b0c014eef..baa466554 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -473,7 +473,7 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
   int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
   const scan_order *scan_order;
   TX_TYPE tx_type;
-  MB_PREDICTION_MODE mode;
+  PREDICTION_MODE mode;
   const int bwl = b_width_log2(plane_bsize);
   const int diff_stride = 4 * (1 << bwl);
   uint8_t *src, *dst;
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index dcf6e8759..edef1e224 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -13,7 +13,7 @@
 
 #include "./vpx_config.h"
 #include "vp9/encoder/vp9_block.h"
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/common/vp9_onyxc_int.h"
 
 #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h
index 50cb9611b..e67f9e3b0 100644
--- a/vp9/encoder/vp9_encodemv.h
+++ b/vp9/encoder/vp9_encodemv.h
@@ -12,7 +12,7 @@
 #ifndef VP9_ENCODER_VP9_ENCODEMV_H_
 #define VP9_ENCODER_VP9_ENCODEMV_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_encoder.c
index f8dd5c5b7..6816f555e 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -36,7 +36,7 @@
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mbgraph.h"
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
@@ -131,6 +131,24 @@ static void setup_inter_frame(VP9_COMMON *cm) {
   cm->fc = cm->frame_contexts[cm->frame_context_idx];
 }
 
+static void setup_frame(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  // Set up entropy context depending on frame type. The decoder mandates
+  // the use of the default context, index 0, for keyframes and inter
+  // frames where the error_resilient_mode or intra_only flag is set. For
+  // other inter-frames the encoder currently uses only two contexts;
+  // context 1 for ALTREF frames and context 0 for the others.
+  if (cm->frame_type == KEY_FRAME) {
+    setup_key_frame(cpi);
+  } else {
+    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
+        cm->frame_context_idx = cpi->refresh_alt_ref_frame;
+     setup_inter_frame(cm);
+  }
+}
+
+
+
 void vp9_initialize_enc() {
   static int init_done = 0;
 
@@ -537,7 +555,7 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
 
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
       (cpi->svc.number_spatial_layers > 1 &&
       cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) {
     vp9_init_layer_context(cpi);
@@ -624,7 +642,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   cpi->encode_breakout = cpi->oxcf.encode_breakout;
 
   // local file playback mode == really big buffer
-  if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+  if (cpi->oxcf.rc_mode == RC_MODE_VBR) {
     cpi->oxcf.starting_buffer_level   = 60000;
     cpi->oxcf.optimal_buffer_level    = 60000;
     cpi->oxcf.maximum_buffer_size     = 240000;
@@ -668,8 +686,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   cm->display_width = cpi->oxcf.width;
   cm->display_height = cpi->oxcf.height;
 
-  cm->lf.sharpness_level = cpi->oxcf.sharpness;
-
   if (cpi->initial_width) {
     // Increasing the size of the frame beyond the first seen frame, or some
     // otherwise signaled maximum size, is not supported.
@@ -680,7 +696,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   update_frame_size(cpi);
 
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
       (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
     vp9_update_layer_context_change_config(cpi,
                                            (int)cpi->oxcf.target_bandwidth);
@@ -813,7 +829,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
                                sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
   }
 
-  cpi->key_frame_frequency = cpi->oxcf.key_freq;
   cpi->refresh_alt_ref_frame = 0;
 
 #if CONFIG_MULTIPLE_ARF
@@ -1584,7 +1599,7 @@ static int recode_loop_test(const VP9_COMP *cpi,
     if ((rc->projected_frame_size > high_limit && q < maxq) ||
         (rc->projected_frame_size < low_limit && q > minq)) {
       force_recode = 1;
-    } else if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
+    } else if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
       // Deal with frame undershoot and whether or not we are
       // below the automatically set cq level.
       if (q > oxcf->cq_level &&
@@ -1800,20 +1815,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
   VP9_COMMON *const cm = &cpi->common;
   vp9_clear_system_state();
   vp9_set_quantizer(cm, q);
-
-  // Set up entropy context depending on frame type. The decoder mandates
-  // the use of the default context, index 0, for keyframes and inter
-  // frames where the error_resilient_mode or intra_only flag is set. For
-  // other inter-frames the encoder currently uses only two contexts;
-  // context 1 for ALTREF frames and context 0 for the others.
-  if (cm->frame_type == KEY_FRAME) {
-    setup_key_frame(cpi);
-  } else {
-    if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
-      cm->frame_context_idx = cpi->refresh_alt_ref_frame;
-
-    setup_inter_frame(cm);
-  }
+  setup_frame(cpi);
   // Variance adaptive and in frame q adjustment experiments are mutually
   // exclusive.
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -1858,21 +1860,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
 
     vp9_set_quantizer(cm, q);
 
-    if (loop_count == 0) {
-      // Set up entropy context depending on frame type. The decoder mandates
-      // the use of the default context, index 0, for keyframes and inter
-      // frames where the error_resilient_mode or intra_only flag is set. For
-      // other inter-frames the encoder currently uses only two contexts;
-      // context 1 for ALTREF frames and context 0 for the others.
-      if (cm->frame_type == KEY_FRAME) {
-        setup_key_frame(cpi);
-      } else {
-        if (!cm->intra_only && !cm->error_resilient_mode && !cpi->use_svc)
-          cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
-
-        setup_inter_frame(cm);
-      }
-    }
+    if (loop_count == 0)
+      setup_frame(cpi);
 
     // Variance adaptive and in frame q adjustment experiments are mutually
     // exclusive.
@@ -1907,7 +1896,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         frame_over_shoot_limit = 1;
     }
 
-    if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
+    if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
       loop = 0;
     } else {
       if ((cm->frame_type == KEY_FRAME) &&
@@ -2005,7 +1994,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
-            if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
+            if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY &&
                 q < q_low) {
               q_low = q;
             }
@@ -2093,8 +2082,9 @@ static void set_ext_overrides(VP9_COMP *cpi) {
   }
 }
 
-static YV12_BUFFER_CONFIG *scale_if_required(VP9_COMMON *cm,
-  YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled) {
+YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
+                                          YV12_BUFFER_CONFIG *unscaled,
+                                          YV12_BUFFER_CONFIG *scaled) {
   if (cm->mi_cols * MI_SIZE != unscaled->y_width ||
       cm->mi_rows * MI_SIZE != unscaled->y_height) {
     scale_and_extend_frame_nonnormative(unscaled, scaled);
@@ -2119,12 +2109,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   struct segmentation *const seg = &cm->seg;
   set_ext_overrides(cpi);
 
-  cpi->Source = scale_if_required(cm, cpi->un_scaled_source,
-                                  &cpi->scaled_source);
+  cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+                                      &cpi->scaled_source);
 
   if (cpi->unscaled_last_source != NULL)
-    cpi->Last_Source = scale_if_required(cm, cpi->unscaled_last_source,
-                                         &cpi->scaled_last_source);
+    cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+                                             &cpi->scaled_last_source);
 
   vp9_scale_references(cpi);
 
@@ -2163,7 +2153,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
-    setup_key_frame(cpi);
     // Reset the loop filter deltas and segmentation map.
     vp9_reset_segment_features(&cm->seg);
 
@@ -2203,7 +2192,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // For 1 pass CBR, check if we are dropping this frame.
   // Never drop on key frame.
   if (cpi->pass == 0 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
+      cpi->oxcf.rc_mode == RC_MODE_CBR &&
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
       vp9_rc_postencode_update_drop_frame(cpi);
@@ -2400,7 +2389,7 @@ static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
 
 static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
                         unsigned int *frame_flags) {
-  if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (cpi->oxcf.rc_mode == RC_MODE_CBR) {
     vp9_rc_get_one_pass_cbr_params(cpi);
   } else {
     vp9_rc_get_one_pass_vbr_params(cpi);
@@ -2691,7 +2680,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   }
 
   if (cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+      cpi->oxcf.rc_mode == RC_MODE_CBR) {
     vp9_update_temporal_layer_framerate(cpi);
     vp9_restore_layer_context(cpi);
   }
@@ -2724,7 +2713,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   if (cpi->pass == 2 &&
       cm->current_video_frame == 0 &&
       cpi->oxcf.allow_spatial_resampling &&
-      cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+      cpi->oxcf.rc_mode == RC_MODE_VBR) {
     // Internal scaling is triggered on the first frame.
     vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width,
                          cpi->oxcf.scaled_frame_height);
@@ -2783,7 +2772,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   // Save layer specific state.
   if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ||
+      cpi->oxcf.rc_mode == RC_MODE_CBR) ||
       (cpi->svc.number_spatial_layers > 1 && cpi->pass == 2)) {
     vp9_save_layer_context(cpi);
   }
@@ -2905,9 +2894,10 @@ int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
                    unsigned int threshold[MAX_SEGMENTS]) {
   signed char feature_data[SEG_LVL_MAX][MAX_SEGMENTS];
   struct segmentation *seg = &cpi->common.seg;
+  const VP9_COMMON *const cm = &cpi->common;
   int i;
 
-  if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
+  if (cm->mb_rows != rows || cm->mb_cols != cols)
     return -1;
 
   if (!map) {
@@ -2915,8 +2905,7 @@ int vp9_set_roimap(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
     return 0;
   }
 
-  // Set the segmentation Map
-  vp9_set_segmentation_map(cpi, map);
+  vpx_memcpy(cpi->segmentation_map, map, cm->mi_rows * cm->mi_cols);
 
   // Activate segmentation.
   vp9_enable_segmentation(seg);
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_encoder.h
index 5f4112369..8f3249407 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef VP9_ENCODER_VP9_ONYX_INT_H_
-#define VP9_ENCODER_VP9_ONYX_INT_H_
+#ifndef VP9_ENCODER_VP9_ENCODER_H_
+#define VP9_ENCODER_VP9_ENCODER_H_
 
 #include <stdio.h>
 
@@ -132,11 +132,11 @@ typedef enum {
 } VPX_SCALING;
 
 typedef enum {
-  USAGE_LOCAL_FILE_PLAYBACK = 0,
-  USAGE_STREAM_FROM_SERVER  = 1,
-  USAGE_CONSTRAINED_QUALITY = 2,
-  USAGE_CONSTANT_QUALITY    = 3,
-} END_USAGE;
+  RC_MODE_VBR = 0,
+  RC_MODE_CBR = 1,
+  RC_MODE_CONSTRAINED_QUALITY = 2,
+  RC_MODE_CONSTANT_QUALITY    = 3,
+} RC_MODE;
 
 typedef enum {
   // Good Quality Fast Encoding. The encoder balances quality with the
@@ -210,7 +210,7 @@ typedef struct VP9EncoderConfig {
   // ----------------------------------------------------------------
   // DATARATE CONTROL OPTIONS
 
-  END_USAGE end_usage;  // vbr or cbr
+  RC_MODE rc_mode;  // vbr, cbr, constrained quality or constant quality
 
   // buffer targeting aggressiveness
   int under_shoot_pct;
@@ -276,7 +276,6 @@ typedef struct VP9EncoderConfig {
 
   int arnr_max_frames;
   int arnr_strength;
-  int arnr_type;
 
   int tile_columns;
   int tile_rows;
@@ -338,8 +337,6 @@ typedef struct VP9_COMP {
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
 
-  int key_frame_frequency;
-
   int gold_is_last;  // gold same as last frame ( short circuit gold searches)
   int alt_is_last;  // Alt same as last ( short circuit altref search)
   int gold_is_alt;  // don't do both alt and gold search ( just do gold).
@@ -618,6 +615,10 @@ void vp9_update_reference_frames(VP9_COMP *cpi);
 
 int64_t vp9_rescale(int64_t val, int64_t num, int denom);
 
+YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
+                                          YV12_BUFFER_CONFIG *unscaled,
+                                          YV12_BUFFER_CONFIG *scaled);
+
 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
                                 MV_REFERENCE_FRAME ref0,
                                 MV_REFERENCE_FRAME ref1) {
@@ -631,4 +632,4 @@ static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
 }  // extern "C"
 #endif
 
-#endif  // VP9_ENCODER_VP9_ONYX_INT_H_
+#endif  // VP9_ENCODER_VP9_ENCODER_H_
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 1dacfaaae..3d4b96259 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -28,10 +28,10 @@
 #include "vp9/encoder/vp9_encodeframe.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
@@ -533,6 +533,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
     // Disable golden frame for svc first pass for now.
     gld_yv12 = NULL;
     set_ref_ptrs(cm, xd, ref_frame, NONE);
+
+    cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+                                        &cpi->scaled_source);
   }
 
   vp9_setup_src_planes(x, cpi->Source, 0, 0);
@@ -849,6 +852,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
     ++twopass->sr_update_lag;
   }
 
+  vp9_extend_frame_borders(new_yv12);
+
   if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) {
     vp9_update_reference_frames(cpi);
   } else {
@@ -856,8 +861,6 @@ void vp9_first_pass(VP9_COMP *cpi) {
     swap_yv12(lst_yv12, new_yv12);
   }
 
-  vp9_extend_frame_borders(lst_yv12);
-
   // Special case for the first frame. Copy into the GF buffer as a second
   // reference.
   if (cm->current_video_frame == 0 && gld_yv12 != NULL) {
@@ -930,7 +933,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
     }
 
     // Restriction on active max q for constrained quality mode.
-    if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY)
+    if (cpi->oxcf.rc_mode == RC_MODE_CONSTRAINED_QUALITY)
       q = MAX(q, oxcf->cq_level);
     return q;
   }
@@ -1932,7 +1935,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
   // Find the next keyframe.
   i = 0;
-  while (twopass->stats_in < twopass->stats_in_end) {
+  while (twopass->stats_in < twopass->stats_in_end &&
+         rc->frames_to_key < cpi->oxcf.key_freq) {
     // Accumulate kf group error.
     kf_group_err += calculate_modified_err(cpi, this_frame);
 
@@ -1962,7 +1966,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
 
       // Special check for transition or high motion followed by a
       // static scene.
-      if (detect_transition_to_still(twopass, i, cpi->key_frame_frequency - i,
+      if (detect_transition_to_still(twopass, i, cpi->oxcf.key_freq - i,
                                      loop_decay_rate, decay_accumulator))
         break;
 
@@ -1970,8 +1974,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       ++rc->frames_to_key;
 
       // If we don't have a real key frame within the next two
-      // key_frame_frequency intervals then break out of the loop.
-      if (rc->frames_to_key >= 2 * (int)cpi->key_frame_frequency)
+      // key_freq intervals then break out of the loop.
+      if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq)
         break;
     } else {
       ++rc->frames_to_key;
@@ -1984,7 +1988,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // This code centers the extra kf if the actual natural interval
   // is between 1x and 2x.
   if (cpi->oxcf.auto_key &&
-      rc->frames_to_key > (int)cpi->key_frame_frequency) {
+      rc->frames_to_key > cpi->oxcf.key_freq) {
     FIRSTPASS_STATS tmp_frame = first_frame;
 
     rc->frames_to_key /= 2;
@@ -2000,7 +2004,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       input_stats(twopass, &tmp_frame);
     }
     rc->next_key_frame_forced = 1;
-  } else if (twopass->stats_in == twopass->stats_in_end) {
+  } else if (twopass->stats_in == twopass->stats_in_end ||
+             rc->frames_to_key >= cpi->oxcf.key_freq) {
     rc->next_key_frame_forced = 1;
   } else {
     rc->next_key_frame_forced = 0;
@@ -2244,7 +2249,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     cm->frame_type = INTER_FRAME;
 #ifdef LONG_TERM_VBR_CORRECTION
     // Correction to rate target based on prior over or under shoot.
-    if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+    if (cpi->oxcf.rc_mode == RC_MODE_VBR)
       vbr_rate_correction(&modified_target, rc->vbr_bits_off_target);
 #endif
     vp9_rc_set_frame_target(cpi, modified_target);
@@ -2258,7 +2263,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
   }
 
-  if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
+  if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
     twopass->active_worst_quality = cpi->oxcf.cq_level;
   } else if (cm->current_video_frame == 0 ||
              (is_spatial_svc && lc->current_video_frame_in_layer == 0)) {
@@ -2346,7 +2351,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   rc->base_frame_target = target;
 #ifdef LONG_TERM_VBR_CORRECTION
   // Correction to rate target based on prior over or under shoot.
-  if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+  if (cpi->oxcf.rc_mode == RC_MODE_VBR)
     vbr_rate_correction(&target, rc->vbr_bits_off_target);
 #endif
   vp9_rc_set_frame_target(cpi, target);
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index cf03e0142..a298f1c20 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -14,9 +14,9 @@
 
 #include "vp9/common/vp9_common.h"
 
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_lookahead.h"
-#include "vp9/encoder/vp9_onyx_int.h"
 
 struct lookahead_ctx {
   unsigned int max_sz;         /* Absolute size of the queue */
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 2f63a13a0..e7dcc7a18 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -131,11 +131,10 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) {
 
   return err;
 }
-static int find_best_16x16_intra(VP9_COMP *cpi,
-                                 MB_PREDICTION_MODE *pbest_mode) {
+static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) {
   MACROBLOCK   *const x  = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
-  MB_PREDICTION_MODE best_mode = -1, mode;
+  PREDICTION_MODE best_mode = -1, mode;
   unsigned int best_err = INT_MAX;
 
   // calculate SATD for each intra prediction mode;
diff --git a/vp9/encoder/vp9_mbgraph.h b/vp9/encoder/vp9_mbgraph.h
index bc2a7048f..c3af972bc 100644
--- a/vp9/encoder/vp9_mbgraph.h
+++ b/vp9/encoder/vp9_mbgraph.h
@@ -20,7 +20,7 @@ typedef struct {
     int err;
     union {
       int_mv mv;
-      MB_PREDICTION_MODE mode;
+      PREDICTION_MODE mode;
     } m;
   } ref[MAX_REF_FRAMES];
 } MBGRAPH_MB_STATS;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 8a7901172..89937f5a6 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -18,7 +18,7 @@
 
 #include "vp9/common/vp9_common.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_mcomp.h"
 
 // #define NEW_DIAMOND_SEARCH
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index e003a0f42..7c42bb84f 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -19,7 +19,7 @@
 #include "vp9/common/vp9_onyxc_int.h"
 #include "vp9/common/vp9_quant_common.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
 
diff --git a/vp9/encoder/vp9_picklpf.h b/vp9/encoder/vp9_picklpf.h
index 7d08ddb5f..33c490f69 100644
--- a/vp9/encoder/vp9_picklpf.h
+++ b/vp9/encoder/vp9_picklpf.h
@@ -16,7 +16,7 @@
 extern "C" {
 #endif
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 struct yv12_buffer_config;
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 3a9f0f5f1..56eb9440c 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -22,7 +22,7 @@
 #include "vp9/common/vp9_reconinter.h"
 #include "vp9/common/vp9_reconintra.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 
@@ -212,7 +212,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
-  MB_PREDICTION_MODE this_mode, best_mode = ZEROMV;
+  PREDICTION_MODE this_mode, best_mode = ZEROMV;
   MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
   INTERP_FILTER best_pred_filter = EIGHTTAP;
   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
@@ -235,7 +235,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   unsigned char segment_id = mbmi->segment_id;
   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
   const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
-  // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
+  // Mode index conversion form THR_MODES to PREDICTION_MODE for a ref frame.
   int mode_idx[MB_MODE_COUNT] = {0};
   INTERP_FILTER filter_ref = SWITCHABLE;
   int bsl = mi_width_log2_lookup[bsize];
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 05ff18762..a9c948d31 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -11,7 +11,7 @@
 #ifndef VP9_ENCODER_VP9_PICKMODE_H_
 #define VP9_ENCODER_VP9_PICKMODE_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 2b2aa9add..5206bb671 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -15,7 +15,7 @@
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_rdopt.h"
 
@@ -242,10 +242,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
   }
 
 #if CONFIG_ALPHA
-  x->plane[3].quant = cpi->a_quant[qindex];
-  x->plane[3].quant_shift = cpi->a_quant_shift[qindex];
-  x->plane[3].zbin = cpi->a_zbin[qindex];
-  x->plane[3].round = cpi->a_round[qindex];
+  x->plane[3].quant = quants->a_quant[qindex];
+  x->plane[3].quant_shift = quants->a_quant_shift[qindex];
+  x->plane[3].zbin = quants->a_zbin[qindex];
+  x->plane[3].round = quants->a_round[qindex];
   x->plane[3].zbin_extra = (int16_t)((cm->a_dequant[qindex][1] * zbin) >> 7);
   xd->plane[3].dequant = cm->a_dequant[qindex];
 #endif
diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h
index 7a9388300..1835e9ccc 100644
--- a/vp9/encoder/vp9_quantize.h
+++ b/vp9/encoder/vp9_quantize.h
@@ -11,6 +11,7 @@
 #ifndef VP9_ENCODER_VP9_QUANTIZE_H_
 #define VP9_ENCODER_VP9_QUANTIZE_H_
 
+#include "./vpx_config.h"
 #include "vp9/encoder/vp9_block.h"
 
 #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b8d0ec40d..6ebd9f3fa 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -181,13 +181,13 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
   rc->buffer_level = rc->bits_off_target;
 
-  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR) {
     update_layer_buffer_level(&cpi->svc, encoded_frame_size);
   }
 }
 
 void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
-  if (pass == 0 && oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+  if (pass == 0 && oxcf->rc_mode == RC_MODE_CBR) {
     rc->avg_frame_qindex[0] = oxcf->worst_allowed_q;
     rc->avg_frame_qindex[1] = oxcf->worst_allowed_q;
     rc->avg_frame_qindex[2] = oxcf->worst_allowed_q;
@@ -280,7 +280,7 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
         !cpi->rc.is_src_frame_alt_ref &&
-        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
+        !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR))
       return cpi->rc.gf_rate_correction_factor;
     else
       return cpi->rc.rate_correction_factor;
@@ -293,7 +293,7 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
   } else {
     if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
         !cpi->rc.is_src_frame_alt_ref &&
-        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
+        !(cpi->use_svc && cpi->oxcf.rc_mode == RC_MODE_CBR))
       cpi->rc.gf_rate_correction_factor = factor;
     else
       cpi->rc.rate_correction_factor = factor;
@@ -671,7 +671,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
       q = rc->avg_frame_qindex[KEY_FRAME];
     }
     // For constrained quality dont allow Q less than the cq level
-    if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
+    if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
       if (q < cq_level)
         q = cq_level;
       if (rc->frames_since_key > 1) {
@@ -688,7 +688,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
       // Constrained quality use slightly lower active best.
       active_best_quality = active_best_quality * 15 / 16;
 
-    } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+    } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
       if (!cpi->refresh_alt_ref_frame) {
         active_best_quality = cq_level;
       } else {
@@ -708,7 +708,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
           gf_low_motion_minq, gf_high_motion_minq);
     }
   } else {
-    if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+    if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
       active_best_quality = cq_level;
     } else {
       // Use the lower of active_worst_quality and recent/average Q.
@@ -718,7 +718,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
         active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
-      if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
+      if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) &&
           (active_best_quality < cq_level)) {
         active_best_quality = cq_level;
       }
@@ -755,7 +755,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
   }
 #endif
 
-  if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+  if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames
   } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
@@ -774,7 +774,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
 #if CONFIG_MULTIPLE_ARF
   // Force the quantizer determined by the coding order pattern.
   if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
-      cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
+      cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) {
     double new_q;
     double current_q = vp9_convert_qindex_to_q(active_worst_quality);
     int level = cpi->this_frame_weight;
@@ -862,7 +862,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
       q = active_worst_quality;
     }
     // For constrained quality dont allow Q less than the cq level
-    if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
+    if (oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) {
       if (q < cq_level)
         q = cq_level;
       if (rc->frames_since_key > 1) {
@@ -879,7 +879,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
       // Constrained quality use slightly lower active best.
       active_best_quality = active_best_quality * 15 / 16;
 
-    } else if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+    } else if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
       if (!cpi->refresh_alt_ref_frame) {
         active_best_quality = cq_level;
       } else {
@@ -899,14 +899,14 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
           gf_low_motion_minq, gf_high_motion_minq);
     }
   } else {
-    if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+    if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
       active_best_quality = cq_level;
     } else {
       active_best_quality = inter_minq[active_worst_quality];
 
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
-      if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
+      if ((oxcf->rc_mode == RC_MODE_CONSTRAINED_QUALITY) &&
           (active_best_quality < cq_level)) {
         active_best_quality = cq_level;
       }
@@ -932,7 +932,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
       qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
                                           active_worst_quality, 2.0);
     } else if (!rc->is_src_frame_alt_ref &&
-               (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
+               (oxcf->rc_mode != RC_MODE_CBR) &&
                (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
       qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
                                           active_worst_quality, 1.75);
@@ -942,7 +942,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
   }
 #endif
 
-  if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
+  if (oxcf->rc_mode == RC_MODE_CONSTANT_QUALITY) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames.
   } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) {
@@ -961,7 +961,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
 #if CONFIG_MULTIPLE_ARF
   // Force the quantizer determined by the coding order pattern.
   if (cpi->multi_arf_enabled && (cm->frame_type != KEY_FRAME) &&
-      cpi->oxcf.end_usage != USAGE_CONSTANT_QUALITY) {
+      cpi->oxcf.rc_mode != RC_MODE_CONSTANT_QUALITY) {
     double new_q;
     double current_q = vp9_convert_qindex_to_q(active_worst_quality);
     int level = cpi->this_frame_weight;
@@ -987,7 +987,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
                              int *bottom_index, int *top_index) {
   int q;
   if (cpi->pass == 0) {
-    if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+    if (cpi->oxcf.rc_mode == RC_MODE_CBR)
       q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
     else
       q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
@@ -1015,7 +1015,7 @@ void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
                                       int frame_target,
                                       int *frame_under_shoot_limit,
                                       int *frame_over_shoot_limit) {
-  if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
+  if (cpi->oxcf.rc_mode == RC_MODE_CONSTANT_QUALITY) {
     *frame_under_shoot_limit = 0;
     *frame_over_shoot_limit  = INT_MAX;
   } else {
@@ -1090,7 +1090,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   // Post encode loop adjustment of Q prediction.
   vp9_rc_update_rate_correction_factors(
       cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
-            oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+            oxcf->rc_mode == RC_MODE_CBR) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
@@ -1099,7 +1099,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
         ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
-             !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
+             !(cpi->use_svc && oxcf->rc_mode == RC_MODE_CBR)) {
     rc->last_q[2] = qindex;
     rc->avg_frame_qindex[2] =
         ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
@@ -1215,7 +1215,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->frames_to_key = cpi->oxcf.key_freq;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
   } else {
@@ -1247,7 +1247,7 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
   int target = rc->avg_frame_bandwidth;
   if (svc->number_temporal_layers > 1 &&
-      oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+      oxcf->rc_mode == RC_MODE_CBR) {
     // Note that for layers, avg_frame_bandwidth is the cumulative
     // per-frame-bandwidth. For the target size of this frame, use the
     // layer average frame size (i.e., non-cumulative per-frame-bw).
@@ -1280,7 +1280,7 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
     int kf_boost = 32;
     double framerate = oxcf->framerate;
     if (svc->number_temporal_layers > 1 &&
-        oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+        oxcf->rc_mode == RC_MODE_CBR) {
       // Use the layer framerate for temporal layers CBR mode.
       const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
       framerate = lc->framerate;
@@ -1302,15 +1302,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
   if ((cm->current_video_frame == 0) ||
       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key && (rc->frames_since_key %
-                              cpi->key_frame_frequency == 0))) {
+          cpi->oxcf.key_freq == 0))) {
     cm->frame_type = KEY_FRAME;
     rc->source_alt_ref_active = 0;
-    if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
     }
   } else {
     cm->frame_type = INTER_FRAME;
-    if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    if (cpi->pass == 0 && cpi->oxcf.rc_mode == RC_MODE_CBR) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
     }
   }
@@ -1330,7 +1330,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
     cm->frame_type = KEY_FRAME;
     rc->this_key_frame_forced = cm->current_video_frame != 0 &&
                                 rc->frames_to_key == 0;
-    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->frames_to_key = cpi->oxcf.key_freq;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
     target = calc_iframe_target_size_one_pass_cbr(cpi);
@@ -1415,7 +1415,7 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
   rc->max_gf_interval = 16;
 
   // Extended interval for genuinely static scenes
-  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+  rc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
 
   // Special conditions when alt ref frame enabled in lagged compress mode
   if (oxcf->play_alternate && oxcf->lag_in_frames) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 55ae721be..f309aac96 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -33,8 +33,8 @@
 #include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
@@ -56,7 +56,7 @@
 #define MIN_EARLY_TERM_INDEX    3
 
 typedef struct {
-  MB_PREDICTION_MODE mode;
+  PREDICTION_MODE mode;
   MV_REFERENCE_FRAME ref_frame[2];
 } MODE_DEFINITION;
 
@@ -243,9 +243,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
   cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
 }
 
-static void set_block_thresholds(VP9_COMP *cpi) {
-  const VP9_COMMON *const cm = &cpi->common;
-  RD_OPT *const rd = &cpi->rd;
+static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
   int i, bsize, segment_id;
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
@@ -280,20 +278,21 @@ static void set_block_thresholds(VP9_COMP *cpi) {
 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->mb;
+  RD_OPT *const rd = &cpi->rd;
   int i;
 
   vp9_clear_system_state();
 
-  cpi->rd.RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
-  cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+  rd->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
+  rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 
-  x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO;
+  x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
   x->errorperbit += (x->errorperbit == 0);
 
   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                          cm->frame_type != KEY_FRAME) ? 0 : 1;
 
-  set_block_thresholds(cpi);
+  set_block_thresholds(cm, rd);
 
   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
     fill_token_costs(x->token_costs, cm->fc.coef_probs);
@@ -1022,8 +1021,8 @@ static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 }
 
 
-static int conditional_skipintra(MB_PREDICTION_MODE mode,
-                                 MB_PREDICTION_MODE best_intra_mode) {
+static int conditional_skipintra(PREDICTION_MODE mode,
+                                 PREDICTION_MODE best_intra_mode) {
   if (mode == D117_PRED &&
       best_intra_mode != V_PRED &&
       best_intra_mode != D135_PRED)
@@ -1044,13 +1043,13 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode,
 }
 
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
-                                     MB_PREDICTION_MODE *best_mode,
+                                     PREDICTION_MODE *best_mode,
                                      const int *bmode_costs,
                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                      int *bestrate, int *bestratey,
                                      int64_t *bestdistortion,
                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
-  MB_PREDICTION_MODE mode;
+  PREDICTION_MODE mode;
   MACROBLOCKD *const xd = &x->e_mbd;
   int64_t best_rd = rd_thresh;
 
@@ -1196,13 +1195,13 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
-      MB_PREDICTION_MODE best_mode = DC_PRED;
+      PREDICTION_MODE best_mode = DC_PRED;
       int r = INT_MAX, ry = INT_MAX;
       int64_t d = INT64_MAX, this_rd = INT64_MAX;
       i = idy * 2 + idx;
       if (cpi->common.frame_type == KEY_FRAME) {
-        const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
-        const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
+        const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
+        const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
 
         bmode_costs  = mb->y_mode_costs[A][L];
       }
@@ -1243,8 +1242,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                       BLOCK_SIZE bsize,
                                       int64_t tx_cache[TX_MODES],
                                       int64_t best_rd) {
-  MB_PREDICTION_MODE mode;
-  MB_PREDICTION_MODE mode_selected = DC_PRED;
+  PREDICTION_MODE mode;
+  PREDICTION_MODE mode_selected = DC_PRED;
   MACROBLOCKD *const xd = &x->e_mbd;
   MODE_INFO *const mic = xd->mi[0];
   int this_rate, this_rate_tokenonly, s;
@@ -1267,8 +1266,8 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
       continue;
 
     if (cpi->common.frame_type == KEY_FRAME) {
-      const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
-      const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
+      const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
+      const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
 
       bmode_costs = x->y_mode_costs[A][L];
     }
@@ -1362,8 +1361,8 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                        int64_t *distortion, int *skippable,
                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
   MACROBLOCKD *xd = &x->e_mbd;
-  MB_PREDICTION_MODE mode;
-  MB_PREDICTION_MODE mode_selected = DC_PRED;
+  PREDICTION_MODE mode;
+  PREDICTION_MODE mode_selected = DC_PRED;
   int64_t best_rd = INT64_MAX, this_rd;
   int this_rate_tokenonly, this_rate, s;
   int64_t this_distortion, this_sse;
@@ -1435,7 +1434,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
                                  int *rate_uv, int *rate_uv_tokenonly,
                                  int64_t *dist_uv, int *skip_uv,
-                                 MB_PREDICTION_MODE *mode_uv) {
+                                 PREDICTION_MODE *mode_uv) {
   MACROBLOCK *const x = &cpi->mb;
 
   // Use an estimated rd for uv_intra based on DC_PRED if the
@@ -1453,7 +1452,7 @@ static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
 }
 
-static int cost_mv_ref(const VP9_COMP *cpi, MB_PREDICTION_MODE mode,
+static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
                        int mode_context) {
   const MACROBLOCK *const x = &cpi->mb;
   const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
@@ -1475,7 +1474,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                 int *rate_mv);
 
 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
-                                MB_PREDICTION_MODE mode, int_mv this_mv[2],
+                                PREDICTION_MODE mode, int_mv this_mv[2],
                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
                                 int_mv seg_mvs[MAX_REF_FRAMES],
                                 int_mv *best_ref_mv[2], const int *mvjcost,
@@ -1628,7 +1627,7 @@ typedef struct {
   int64_t d;
   int64_t sse;
   int segment_yrate;
-  MB_PREDICTION_MODE modes[4];
+  PREDICTION_MODE modes[4];
   SEG_RDSTAT rdstat[4][INTER_MODES];
   int mvthresh;
 } BEST_SEG_INFO;
@@ -1726,7 +1725,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int mode_idx;
   int k, br = 0, idx, idy;
   int64_t bd = 0, block_sse = 0;
-  MB_PREDICTION_MODE this_mode;
+  PREDICTION_MODE this_mode;
   VP9_COMMON *cm = &cpi->common;
   struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1770,7 +1769,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
       int_mv mode_mv[MB_MODE_COUNT][2];
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
-      MB_PREDICTION_MODE mode_selected = ZEROMV;
+      PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
       const int i = idy * 2 + idx;
       int ref;
@@ -2178,12 +2177,12 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
   x->pred_mv_sad[ref_frame] = best_sad;
 }
 
-static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
+static void estimate_ref_frame_costs(const VP9_COMMON *cm,
+                                     const MACROBLOCKD *xd,
+                                     int segment_id,
                                      unsigned int *ref_costs_single,
                                      unsigned int *ref_costs_comp,
                                      vp9_prob *comp_mode_p) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &cpi->mb.e_mbd;
   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
                                              SEG_LVL_REF_FRAME);
   if (seg_ref_active) {
@@ -2348,7 +2347,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                  int mi_row, int mi_col,
                                  int_mv *tmp_mv, int *rate_mv) {
   MACROBLOCKD *xd = &x->e_mbd;
-  VP9_COMMON *cm = &cpi->common;
+  const VP9_COMMON *cm = &cpi->common;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
   int bestsme = INT_MAX;
@@ -2386,7 +2385,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
 
   // Work out the size of the first step in the mv step search.
   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
-  if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
+  if (cpi->sf.auto_mv_step_size && cm->show_frame) {
     // Take wtd average of the step_params based on the last frame's
     // max mv magnitude and that based on the best ref mvs of the current
     // block for the given reference.
@@ -2397,7 +2396,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
-      cpi->common.show_frame) {
+      cm->show_frame) {
     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
                                                        b_width_log2(bsize)));
     step_param = MAX(step_param, boffset);
@@ -2412,7 +2411,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     if (tlevel < 5)
       step_param += 2;
 
-    for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
+    for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
         x->pred_mv[ref].as_int = 0;
         tmp_mv->as_int = INVALID_MV;
@@ -2496,7 +2495,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
 
-  if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
+  if (cpi->sf.adaptive_motion_search && cm->show_frame)
     x->pred_mv[ref].as_int = tmp_mv->as_int;
 
   if (scaled_ref_frame) {
@@ -3104,13 +3103,13 @@ static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
 // combination that wins out.
 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
                                   int best_mode_index) {
-  if (cpi->sf.adaptive_rd_thresh) {
+  if (cpi->sf.adaptive_rd_thresh > 0) {
     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
-    int mode_index;
-    for (mode_index = 0; mode_index < top_mode; ++mode_index) {
-      int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode_index];
+    int mode;
+    for (mode = 0; mode < top_mode; ++mode) {
+      int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
 
-      if (mode_index == best_mode_index) {
+      if (mode == best_mode_index) {
         *fact -= (*fact >> 3);
       } else {
         *fact = MIN(*fact + RD_THRESH_INC,
@@ -3133,7 +3132,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
-  MB_PREDICTION_MODE this_mode;
+  PREDICTION_MODE this_mode;
   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
   unsigned char segment_id = mbmi->segment_id;
   int comp_pred, i;
@@ -3155,13 +3154,13 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   vp9_prob comp_mode_p;
   int64_t best_intra_rd = INT64_MAX;
   int64_t best_inter_rd = INT64_MAX;
-  MB_PREDICTION_MODE best_intra_mode = DC_PRED;
+  PREDICTION_MODE best_intra_mode = DC_PRED;
   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
   INTERP_FILTER tmp_best_filter = SWITCHABLE;
   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
   int64_t dist_uv[TX_SIZES];
   int skip_uv[TX_SIZES];
-  MB_PREDICTION_MODE mode_uv[TX_SIZES];
+  PREDICTION_MODE mode_uv[TX_SIZES];
   int64_t mode_distortions[MB_MODE_COUNT] = {-1};
   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
   const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -3178,7 +3177,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
-  estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
+  estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
                            &comp_mode_p);
 
   for (i = 0; i < REFERENCE_MODES; ++i)
@@ -3775,7 +3774,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   int rate_uv_intra, rate_uv_tokenonly;
   int64_t dist_uv;
   int skip_uv;
-  MB_PREDICTION_MODE mode_uv = DC_PRED;
+  PREDICTION_MODE mode_uv = DC_PRED;
   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
   int_mv seg_mvs[4][MAX_REF_FRAMES];
   b_mode_info best_bmodes[4];
@@ -3792,7 +3791,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       seg_mvs[i][j].as_int = INVALID_MV;
   }
 
-  estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
+  estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
                            &comp_mode_p);
 
   for (i = 0; i < REFERENCE_MODES; ++i)
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index e48566499..cd622d608 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -11,7 +11,7 @@
 #ifndef VP9_ENCODER_VP9_RDOPT_H_
 #define VP9_ENCODER_VP9_RDOPT_H_
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c
index 9d3e6dc12..7537d1b52 100644
--- a/vp9/encoder/vp9_segmentation.c
+++ b/vp9/encoder/vp9_segmentation.c
@@ -29,18 +29,6 @@ void vp9_disable_segmentation(struct segmentation *seg) {
   seg->enabled = 0;
 }
 
-void vp9_set_segmentation_map(VP9_COMP *cpi, unsigned char *segmentation_map) {
-  struct segmentation *const seg = &cpi->common.seg;
-
-  // Copy in the new segmentation map
-  vpx_memcpy(cpi->segmentation_map, segmentation_map,
-             (cpi->common.mi_rows * cpi->common.mi_cols));
-
-  // Signal that the map should be updated.
-  seg->update_map = 1;
-  seg->update_data = 1;
-}
-
 void vp9_set_segment_data(struct segmentation *seg,
                           signed char *feature_data,
                           unsigned char abs_delta) {
diff --git a/vp9/encoder/vp9_segmentation.h b/vp9/encoder/vp9_segmentation.h
index 66c51a21b..50dd562c8 100644
--- a/vp9/encoder/vp9_segmentation.h
+++ b/vp9/encoder/vp9_segmentation.h
@@ -13,7 +13,7 @@
 #define VP9_ENCODER_VP9_SEGMENTATION_H_
 
 #include "vp9/common/vp9_blockd.h"
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -28,9 +28,6 @@ void vp9_disable_segfeature(struct segmentation *seg,
 void vp9_clear_segdata(struct segmentation *seg,
                        int segment_id,
                        SEG_LVL_FEATURES feature_id);
-// Valid values for a segment are 0 to 3
-// Segmentation map is arrange as [Rows][Columns]
-void vp9_set_segmentation_map(VP9_COMP *cpi, unsigned char *segmentation_map);
 
 // The values given for each segment can be either deltas (from the default
 // value chosen for the frame) or absolute values.
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 96916c2e1..93e23eee2 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -10,7 +10,7 @@
 
 #include <limits.h>
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_speed_features.h"
 
 enum {
@@ -80,9 +80,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   }
 
   if (speed >= 2) {
-    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
-                                                        : USE_LARGESTALL;
-
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
                                               : DISABLE_ALL_INTER_SPLIT;
@@ -104,6 +101,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   }
 
   if (speed >= 3) {
+    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+                                                        : USE_LARGESTALL;
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = DISABLE_ALL_SPLIT;
     else
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index e408200b4..2379f35b5 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -10,7 +10,7 @@
 
 #include <math.h>
 
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_svc_layercontext.h"
 
 void vp9_init_layer_context(VP9_COMP *const cpi) {
@@ -153,7 +153,7 @@ void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) {
                                    oxcf->two_pass_vbrmax_section) / 100);
   lrc->max_gf_interval = 16;
 
-  lrc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+  lrc->static_scene_max_gf_interval = cpi->oxcf.key_freq >> 1;
 
   if (oxcf->play_alternate && oxcf->lag_in_frames) {
     if (lrc->max_gf_interval > oxcf->lag_in_frames - 1)
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index a5694161c..ca9339155 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -19,7 +19,7 @@
 #include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_mcomp.h"
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_segmentation.h"
@@ -345,74 +345,33 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
 void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
   VP9_COMMON *const cm = &cpi->common;
   int frame = 0;
-  int frames_to_blur_backward = 0;
-  int frames_to_blur_forward = 0;
   int frames_to_blur = 0;
   int start_frame = 0;
   int strength = cpi->active_arnr_strength;
-  int blur_type = cpi->oxcf.arnr_type;
   int max_frames = cpi->active_arnr_frames;
-  const int num_frames_backward = distance;
-  const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
-                               - (num_frames_backward + 1);
+  int frames_to_blur_backward = distance;
+  int frames_to_blur_forward = vp9_lookahead_depth(cpi->lookahead)
+                                   - (distance + 1);
   struct scale_factors sf;
 
-  switch (blur_type) {
-    case 1:
-      // Backward Blur
-      frames_to_blur_backward = num_frames_backward;
+  // Determine which input frames to filter.
+  if (frames_to_blur_forward > frames_to_blur_backward)
+    frames_to_blur_forward = frames_to_blur_backward;
 
-      if (frames_to_blur_backward >= max_frames)
-        frames_to_blur_backward = max_frames - 1;
+  if (frames_to_blur_backward > frames_to_blur_forward)
+    frames_to_blur_backward = frames_to_blur_forward;
 
-      frames_to_blur = frames_to_blur_backward + 1;
-      break;
+  // When max_frames is even we have 1 more frame backward than forward
+  if (frames_to_blur_forward > (max_frames - 1) / 2)
+    frames_to_blur_forward = (max_frames - 1) / 2;
 
-    case 2:
-      // Forward Blur
-      frames_to_blur_forward = num_frames_forward;
+  if (frames_to_blur_backward > (max_frames / 2))
+    frames_to_blur_backward = max_frames / 2;
 
-      if (frames_to_blur_forward >= max_frames)
-        frames_to_blur_forward = max_frames - 1;
-
-      frames_to_blur = frames_to_blur_forward + 1;
-      break;
-
-    case 3:
-    default:
-      // Center Blur
-      frames_to_blur_forward = num_frames_forward;
-      frames_to_blur_backward = num_frames_backward;
-
-      if (frames_to_blur_forward > frames_to_blur_backward)
-        frames_to_blur_forward = frames_to_blur_backward;
-
-      if (frames_to_blur_backward > frames_to_blur_forward)
-        frames_to_blur_backward = frames_to_blur_forward;
-
-      // When max_frames is even we have 1 more frame backward than forward
-      if (frames_to_blur_forward > (max_frames - 1) / 2)
-        frames_to_blur_forward = ((max_frames - 1) / 2);
-
-      if (frames_to_blur_backward > (max_frames / 2))
-        frames_to_blur_backward = (max_frames / 2);
-
-      frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
-      break;
-  }
+  frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
 
   start_frame = distance + frames_to_blur_forward;
 
-#ifdef DEBUGFWG
-  // DEBUG FWG
-  printf(
-      "max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d "
-      "start:%d",
-      max_frames, num_frames_backward, num_frames_forward, frames_to_blur,
-      frames_to_blur_backward, frames_to_blur_forward, cpi->source_encode_index,
-      cpi->last_alt_ref_sei, start_frame);
-#endif
-
   // Setup scaling factors. Scaling on each of the arnr frames is not supported
   vp9_setup_scale_factors_for_frame(&sf,
       get_frame_new_buffer(cm)->y_crop_width,
@@ -421,7 +380,7 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 
   // Setup frame pointers, NULL indicates frame not included in filter
   vp9_zero(cpi->frames);
-  for (frame = 0; frame < frames_to_blur; frame++) {
+  for (frame = 0; frame < frames_to_blur; ++frame) {
     int which_buffer = start_frame - frame;
     struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead,
                                                      which_buffer);
@@ -435,11 +394,11 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
 void vp9_configure_arnr_filter(VP9_COMP *cpi,
                                const unsigned int frames_to_arnr,
                                const int group_boost) {
+  int q;
   int half_gf_int;
   int frames_after_arf;
-  int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
-  int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
-  int q;
+  int frames_bwd;
+  int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
 
   // Define the arnr filter width for this group of frames. We only
   // filter frames that lie within a distance of half the GF interval
@@ -451,47 +410,26 @@ void vp9_configure_arnr_filter(VP9_COMP *cpi,
   frames_after_arf = vp9_lookahead_depth(cpi->lookahead)
       - frames_to_arnr - 1;
 
-  switch (cpi->oxcf.arnr_type) {
-    case 1:  // Backward filter
-      frames_fwd = 0;
-      if (frames_bwd > half_gf_int)
-        frames_bwd = half_gf_int;
-      break;
-
-    case 2:  // Forward filter
-      if (frames_fwd > half_gf_int)
-        frames_fwd = half_gf_int;
-      if (frames_fwd > frames_after_arf)
-        frames_fwd = frames_after_arf;
-      frames_bwd = 0;
-      break;
-
-    case 3:  // Centered filter
-    default:
-      frames_fwd >>= 1;
-      if (frames_fwd > frames_after_arf)
-        frames_fwd = frames_after_arf;
-      if (frames_fwd > half_gf_int)
-        frames_fwd = half_gf_int;
-
-      frames_bwd = frames_fwd;
-
-      // For even length filter there is one more frame backward
-      // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
-      if (frames_bwd < half_gf_int)
-        frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
-      break;
-  }
+  if (frames_fwd > frames_after_arf)
+    frames_fwd = frames_after_arf;
+  if (frames_fwd > half_gf_int)
+    frames_fwd = half_gf_int;
+
+  frames_bwd = frames_fwd;
+
+  // For even length filter there is one more frame backward
+  // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+  if (frames_bwd < half_gf_int)
+    frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1;
 
   cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
 
   // Adjust the strength based on active max q
   if (cpi->common.current_video_frame > 1)
-    q = ((int)vp9_convert_qindex_to_q(
-        cpi->rc.avg_frame_qindex[INTER_FRAME]));
+    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME]));
   else
-    q = ((int)vp9_convert_qindex_to_q(
-        cpi->rc.avg_frame_qindex[KEY_FRAME]));
+    q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME]));
+
   if (q > 16) {
     cpi->active_arnr_strength = cpi->oxcf.arnr_strength;
   } else {
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index 291ccb37e..8ce98d938 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -20,7 +20,7 @@
 #include "vp9/common/vp9_seg_common.h"
 
 #include "vp9/encoder/vp9_cost.h"
-#include "vp9/encoder/vp9_onyx_int.h"
+#include "vp9/encoder/vp9_encoder.h"
 #include "vp9/encoder/vp9_tokenize.h"
 
 static TOKENVALUE dct_value_tokens[DCT_MAX_VALUE * 2];
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 71867a938..520ee443a 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -18,63 +18,34 @@
 
 #include "vp9/encoder/vp9_variance.h"
 
-void variance(const uint8_t *src_ptr,
-              int  source_stride,
-              const uint8_t *ref_ptr,
-              int  recon_stride,
-              int  w,
-              int  h,
-              unsigned int *sse,
-              int *sum) {
+void variance(const uint8_t *a, int  a_stride,
+              const uint8_t *b, int  b_stride,
+              int  w, int  h, unsigned int *sse, int *sum) {
   int i, j;
-  int diff;
 
   *sum = 0;
   *sse = 0;
 
   for (i = 0; i < h; i++) {
     for (j = 0; j < w; j++) {
-      diff = src_ptr[j] - ref_ptr[j];
+      const int diff = a[j] - b[j];
       *sum += diff;
       *sse += diff * diff;
     }
 
-    src_ptr += source_stride;
-    ref_ptr += recon_stride;
+    a += a_stride;
+    b += b_stride;
   }
 }
 
-/****************************************************************************
- *
- *  ROUTINE       : filter_block2d_bil_first_pass
- *
- *  INPUTS        : uint8_t  *src_ptr          : Pointer to source block.
- *                  uint32_t src_pixels_per_line : Stride of input block.
- *                  uint32_t pixel_step        : Offset between filter input
- *                                               samples (see notes).
- *                  uint32_t output_height     : Input block height.
- *                  uint32_t output_width      : Input block width.
- *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
- *                                               taps.
- *
- *  OUTPUTS       : int32_t *output_ptr        : Pointer to filtered block.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
- *                  either horizontal or vertical direction to produce the
- *                  filtered output block. Used to implement first-pass
- *                  of 2-D separable filter.
- *
- *  SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
- *                  pixel_step defines whether the filter is applied
- *                  horizontally (pixel_step=1) or vertically (pixel_step=
- *                  stride).
- *                  It defines the offset required to move from one input
- *                  to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// first-pass of 2-D separable filter.
+//
+// Produces int32_t output to retain precision for next pass. Two filter taps
+// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
+// defines the offset required to move from one input to the next.
 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                               uint16_t *output_ptr,
                                               unsigned int src_pixels_per_line,
@@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
   }
 }
 
-/****************************************************************************
- *
- *  ROUTINE       : filter_block2d_bil_second_pass
- *
- *  INPUTS        : int32_t  *src_ptr          : Pointer to source block.
- *                  uint32_t src_pixels_per_line : Stride of input block.
- *                  uint32_t pixel_step        : Offset between filter input
- *                                               samples (see notes).
- *                  uint32_t output_height     : Input block height.
- *                  uint32_t output_width      : Input block width.
- *                  int32_t  *vp9_filter       : Array of 2 bi-linear filter
- *                                               taps.
- *
- *  OUTPUTS       : uint16_t *output_ptr       : Pointer to filtered block.
- *
- *  RETURNS       : void
- *
- *  FUNCTION      : Applies a 1-D 2-tap bi-linear filter to the source block in
- *                  either horizontal or vertical direction to produce the
- *                  filtered output block. Used to implement second-pass
- *                  of 2-D separable filter.
- *
- *  SPECIAL NOTES : Requires 32-bit input as produced by
- *                  filter_block2d_bil_first_pass.
- *                  Two filter taps should sum to VP9_FILTER_WEIGHT.
- *                  pixel_step defines whether the filter is applied
- *                  horizontally (pixel_step=1) or vertically (pixel_step=
- *                  stride).
- *                  It defines the offset required to move from one input
- *                  to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// second-pass of 2-D separable filter.
+//
+// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
+// stride). It defines the offset required to move from one input to the next.
 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                                uint8_t *output_ptr,
                                                unsigned int src_pixels_per_line,
@@ -156,266 +103,62 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
   unsigned int i, sum = 0;
 
-  for (i = 0; i < 256; i++) {
-    sum += (src_ptr[i] * src_ptr[i]);
-  }
+  for (i = 0; i < 256; i++)
+    sum += src_ptr[i] * src_ptr[i];
 
   return sum;
 }
 
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 11));
+#define VAR(W, H) \
+unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
+                                       const uint8_t *b, int b_stride, \
+                                       unsigned int *sse) { \
+  unsigned int var; \
+  int avg; \
+\
+  variance(a, a_stride, b, b_stride, W, H, &var, &avg); \
+  *sse = var; \
+  return var - (((int64_t)avg * avg) / (W * H)); \
+}
+
+#define SUBPIX_VAR(W, H) \
+unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
+  const uint8_t *src, int  src_stride, \
+  int xoffset, int  yoffset, \
+  const uint8_t *dst, int dst_stride, \
+  unsigned int *sse) { \
+  uint16_t fdata3[(H + 1) * W]; \
+  uint8_t temp2[H * W]; \
+\
+  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
+                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+\
+  return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
+}
+
+#define SUBPIX_AVG_VAR(W, H) \
+unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
+  const uint8_t *src, int  src_stride, \
+  int xoffset, int  yoffset, \
+  const uint8_t *dst, int dst_stride, \
+  unsigned int *sse, \
+  const uint8_t *second_pred) { \
+  uint16_t fdata3[(H + 1) * W]; \
+  uint8_t temp2[H * W]; \
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, H * W); \
+\
+  var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
+                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+  var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
+                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+\
+  vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
+\
+  return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
 }
 
-unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 64, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
-
-  return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 64, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64);
-  return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 11));
-}
-
-unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 65, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
-
-  return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 65, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32);
-  return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 9));
-}
-
-unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
-
-  return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32);
-  return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 9));
-}
-
-unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
-
-  return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16);
-  return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 12));
-}
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
-  *sse = var;
-  return (var - (((int64_t)avg * avg) >> 10));
-}
 
 void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
                              const uint8_t *ref_ptr, int ref_stride,
@@ -423,104 +166,12 @@ void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
   variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
 }
 
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
-                                 int  source_stride,
-                                 const uint8_t *ref_ptr,
-                                 int  recon_stride,
-                                 unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
-                                int  source_stride,
-                                const uint8_t *ref_ptr,
-                                int  recon_stride,
-                                unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 7));
-}
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
-                                int  source_stride,
-                                const uint8_t *ref_ptr,
-                                int  recon_stride,
-                                unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 7));
-}
-
 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride,
                        const uint8_t *ref_ptr, int ref_stride,
                        unsigned int *sse, int *sum) {
   variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
 }
 
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
-                               int  source_stride,
-                               const uint8_t *ref_ptr,
-                               int  recon_stride,
-                               unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 6));
-}
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr,
-                               int  source_stride,
-                               const uint8_t *ref_ptr,
-                               int  recon_stride,
-                               unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 5));
-}
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr,
-                               int  source_stride,
-                               const uint8_t *ref_ptr,
-                               int  recon_stride,
-                               unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 5));
-}
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
-                               int  source_stride,
-                               const uint8_t *ref_ptr,
-                               int  recon_stride,
-                               unsigned int *sse) {
-  unsigned int var;
-  int avg;
-
-  variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
-  *sse = var;
-  return (var - (((unsigned int)avg * avg) >> 4));
-}
-
-
 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
                             int  source_stride,
                             const uint8_t *ref_ptr,
@@ -573,233 +224,57 @@ unsigned int vp9_mse8x8_c(const uint8_t *src_ptr,
   return var;
 }
 
+VAR(4, 4)
+SUBPIX_VAR(4, 4)
+SUBPIX_AVG_VAR(4, 4)
 
-unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
-                                         int  src_pixels_per_line,
-                                         int  xoffset,
-                                         int  yoffset,
-                                         const uint8_t *dst_ptr,
-                                         int dst_pixels_per_line,
-                                         unsigned int *sse) {
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
+VAR(4, 8)
+SUBPIX_VAR(4, 8)
+SUBPIX_AVG_VAR(4, 8)
 
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+VAR(8, 4)
+SUBPIX_VAR(8, 4)
+SUBPIX_AVG_VAR(8, 4)
 
-  // First filter 1d Horizontal
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 5, 4, hfilter);
+VAR(8, 8)
+SUBPIX_VAR(8, 8)
+SUBPIX_AVG_VAR(8, 8)
 
-  // Now filter Verticaly
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
+VAR(8, 16)
+SUBPIX_VAR(8, 16)
+SUBPIX_AVG_VAR(8, 16)
 
-  return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
-}
+VAR(16, 8)
+SUBPIX_VAR(16, 8)
+SUBPIX_AVG_VAR(16, 8)
 
-unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
-                                             int  src_pixels_per_line,
-                                             int  xoffset,
-                                             int  yoffset,
-                                             const uint8_t *dst_ptr,
-                                             int dst_pixels_per_line,
-                                             unsigned int *sse,
-                                             const uint8_t *second_pred) {
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);  // compound pred buffer
-  uint16_t fdata3[5 * 4];  // Temp data buffer used in filtering
+VAR(16, 16)
+SUBPIX_VAR(16, 16)
+SUBPIX_AVG_VAR(16, 16)
 
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+VAR(16, 32)
+SUBPIX_VAR(16, 32)
+SUBPIX_AVG_VAR(16, 32)
 
-  // First filter 1d Horizontal
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 5, 4, hfilter);
+VAR(32, 16)
+SUBPIX_VAR(32, 16)
+SUBPIX_AVG_VAR(32, 16)
 
-  // Now filter Verticaly
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 4,  4,  4,  4, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
-  return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
-                                         int  src_pixels_per_line,
-                                         int  xoffset,
-                                         int  yoffset,
-                                         const uint8_t *dst_ptr,
-                                         int dst_pixels_per_line,
-                                         unsigned int *sse) {
-  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-
-  return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
-                                             int  src_pixels_per_line,
-                                             int  xoffset,
-                                             int  yoffset,
-                                             const uint8_t *dst_ptr,
-                                             int dst_pixels_per_line,
-                                             unsigned int *sse,
-                                             const uint8_t *second_pred) {
-  uint16_t fdata3[9 * 8];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
-  return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[17 * 16];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
+VAR(32, 32)
+SUBPIX_VAR(32, 32)
+SUBPIX_AVG_VAR(32, 32)
 
-  return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[17 * 16];
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter);
-
-  vp9_comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16);
-  return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  const int16_t *hfilter, *vfilter;
+VAR(32, 64)
+SUBPIX_VAR(32, 64)
+SUBPIX_AVG_VAR(32, 64)
 
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+VAR(64, 32)
+SUBPIX_VAR(64, 32)
+SUBPIX_AVG_VAR(64, 32)
 
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 65, 64, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
-
-  return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[65 * 64];  // Temp data buffer used in filtering
-  uint8_t temp2[68 * 64];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 65, 64, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64);
-  return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
-                                           int  src_pixels_per_line,
-                                           int  xoffset,
-                                           int  yoffset,
-                                           const uint8_t *dst_ptr,
-                                           int dst_pixels_per_line,
-                                           unsigned int *sse) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
-
-  return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
-                                               int  src_pixels_per_line,
-                                               int  xoffset,
-                                               int  yoffset,
-                                               const uint8_t *dst_ptr,
-                                               int dst_pixels_per_line,
-                                               unsigned int *sse,
-                                               const uint8_t *second_pred) {
-  uint16_t fdata3[33 * 32];  // Temp data buffer used in filtering
-  uint8_t temp2[36 * 32];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 33, 32, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32);
-  return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse);
-}
+VAR(64, 64)
+SUBPIX_VAR(64, 64)
+SUBPIX_AVG_VAR(64, 64)
 
 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
                                               int  source_stride,
@@ -882,233 +357,14 @@ unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
                                        ref_ptr, recon_stride, sse);
 }
 
-unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
-unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
-                                      int  src_pixels_per_line,
-                                      int  xoffset,
-                                      int  yoffset,
-                                      const uint8_t *dst_ptr,
-                                      int dst_pixels_per_line,
-                                      unsigned int *sse) {
-  vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
-                                xoffset, yoffset, dst_ptr,
-                                dst_pixels_per_line, sse);
-  return *sse;
-}
-
-unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
-                                          int  src_pixels_per_line,
-                                          int  xoffset,
-                                          int  yoffset,
-                                          const uint8_t *dst_ptr,
-                                          int dst_pixels_per_line,
-                                          unsigned int *sse) {
-  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
-
-  return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
-                                              int  src_pixels_per_line,
-                                              int  xoffset,
-                                              int  yoffset,
-                                              const uint8_t *dst_ptr,
-                                              int dst_pixels_per_line,
-                                              unsigned int *sse,
-                                              const uint8_t *second_pred) {
-  uint16_t fdata3[16 * 9];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 16, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16);
-  return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
-                                          int  src_pixels_per_line,
-                                          int  xoffset,
-                                          int  yoffset,
-                                          const uint8_t *dst_ptr,
-                                          int dst_pixels_per_line,
-                                          unsigned int *sse) {
-  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
-
-  return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
-                                              int  src_pixels_per_line,
-                                              int  xoffset,
-                                              int  yoffset,
-                                              const uint8_t *dst_ptr,
-                                              int dst_pixels_per_line,
-                                              unsigned int *sse,
-                                              const uint8_t *second_pred) {
-  uint16_t fdata3[9 * 16];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 17, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8);
-  return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
-                                         int  src_pixels_per_line,
-                                         int  xoffset,
-                                         int  yoffset,
-                                         const uint8_t *dst_ptr,
-                                         int dst_pixels_per_line,
-                                         unsigned int *sse) {
-  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 5, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
-
-  return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
-                                             int  src_pixels_per_line,
-                                             int  xoffset,
-                                             int  yoffset,
-                                             const uint8_t *dst_ptr,
-                                             int dst_pixels_per_line,
-                                             unsigned int *sse,
-                                             const uint8_t *second_pred) {
-  uint16_t fdata3[8 * 5];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 5, 8, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8);
-  return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
-                                         int  src_pixels_per_line,
-                                         int  xoffset,
-                                         int  yoffset,
-                                         const uint8_t *dst_ptr,
-                                         int dst_pixels_per_line,
-                                         unsigned int *sse) {
-  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
-  // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
-  // of this big? same issue appears in all other block size settings.
-  uint8_t temp2[20 * 16];
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 4, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
-
-  return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
-}
-
-unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
-                                             int  src_pixels_per_line,
-                                             int  xoffset,
-                                             int  yoffset,
-                                             const uint8_t *dst_ptr,
-                                             int dst_pixels_per_line,
-                                             unsigned int *sse,
-                                             const uint8_t *second_pred) {
-  uint16_t fdata3[5 * 8];  // Temp data buffer used in filtering
-  uint8_t temp2[20 * 16];
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);  // compound pred buffer
-  const int16_t *hfilter, *vfilter;
-
-  hfilter = BILINEAR_FILTERS_2TAP(xoffset);
-  vfilter = BILINEAR_FILTERS_2TAP(yoffset);
-
-  var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
-                                    1, 9, 4, hfilter);
-  var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter);
-  vp9_comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4);
-  return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
-}
-
-
 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
                        int height, const uint8_t *ref, int ref_stride) {
   int i, j;
 
   for (i = 0; i < height; i++) {
     for (j = 0; j < width; j++) {
-      int tmp;
-      tmp = pred[j] + ref[j];
-      comp_pred[j] = (tmp + 1) >> 1;
+      const int tmp = pred[j] + ref[j];
+      comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
     }
     comp_pred += width;
     pred += width;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index c9e39a1a2..4c8be71cd 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -17,14 +17,10 @@
 extern "C" {
 #endif
 
-void variance(const uint8_t *src_ptr,
-              int  source_stride,
-              const uint8_t *ref_ptr,
-              int  recon_stride,
-              int  w,
-              int  h,
-              unsigned int *sse,
-              int *sum);
+void variance(const uint8_t *a, int a_stride,
+              const uint8_t *b, int b_stride,
+              int  w, int  h,
+              unsigned int *sse, int *sum);
 
 typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
                                     int source_stride,