30 files changed, 731 insertions, 873 deletions
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 1aab36205..61682c42d 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -195,7 +195,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
                              int block, int mi_row, int mi_col) {
   const int *ref_sign_bias = cm->ref_frame_sign_bias;
   int i, refmv_count = 0;
-  const MODE_INFO *prev_mi = cm->prev_mi
+  const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
         ? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
         : NULL;
   const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index fe9cc9e6a..20de43414 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -120,7 +120,6 @@ typedef struct VP9Common {
   // frame header, 3 reset all contexts.
   int reset_frame_context;
 
-  int frame_flags;
   // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
   // MODE_INFO (8-pixel) units.
   int MBs;
@@ -284,15 +283,15 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
   xd->left_available  = (mi_col > tile->mi_col_start);
 }
 
-static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) {
-  const int use_prev_mi = cm->coding_use_prev_mi &&
-                          cm->width == cm->last_width &&
-                          cm->height == cm->last_height &&
-                          !cm->intra_only &&
-                          cm->last_show_frame;
+static INLINE void set_prev_mi(VP9_COMMON *cm) {
+  const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
+                                       cm->height == cm->last_height &&
+                                       !cm->intra_only &&
+                                       cm->last_show_frame;
   // Special case: set prev_mi to NULL when the previous mode info
   // context cannot be used.
-  return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL;
+  cm->prev_mi = use_prev_in_find_mv_refs ?
+                  cm->prev_mip + cm->mi_stride + 1 : NULL;
 }
 
 static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index b45559245..8a8155410 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -13,7 +13,6 @@ struct macroblockd;
 struct macroblock;
 struct vp9_variance_vtable;
 
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
 struct mv;
 union int_mv;
 struct yv12_buffer_config;
@@ -758,20 +757,20 @@ specialize qw/vp9_fdct32x32_rd sse2 avx2/;
 #
 # Motion search
 #
-add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
 specialize qw/vp9_full_search_sad sse3 sse4_1/;
 $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
 $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
 
-add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_refining_search_sad sse3/;
 $vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
 
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_diamond_search_sad sse3/;
 $vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
 
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
 specialize qw/vp9_full_range_search/;
 
 add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp9/common/x86/vp9_copy_sse2.asm b/vp9/common/x86/vp9_copy_sse2.asm
index dd522c698..b26383708 100644
--- a/vp9/common/x86/vp9_copy_sse2.asm
+++ b/vp9/common/x86/vp9_copy_sse2.asm
@@ -133,10 +133,14 @@ INIT_MMX sse
   movh                    m3, [srcq+r5q]
   lea                   srcq, [srcq+src_strideq*4]
 %ifidn %1, avg
-  pavgb                   m0, [dstq]
-  pavgb                   m1, [dstq+dst_strideq]
-  pavgb                   m2, [dstq+dst_strideq*2]
-  pavgb                   m3, [dstq+r6q]
+  movh                    m4, [dstq]
+  movh                    m5, [dstq+dst_strideq]
+  movh                    m6, [dstq+dst_strideq*2]
+  movh                    m7, [dstq+r6q]
+  pavgb                   m0, m4
+  pavgb                   m1, m5
+  pavgb                   m2, m6
+  pavgb                   m3, m7
 %endif
   movh  [dstq              ], m0
   movh  [dstq+dst_strideq  ], m1
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 9b63961f0..022a4296f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1296,7 +1296,11 @@ int vp9_decode_frame(VP9Decoder *pbi,
   }
 
   init_macroblockd(cm, &pbi->mb);
-  cm->prev_mi = get_prev_mi(cm);
+
+  if (cm->coding_use_prev_mi)
+    set_prev_mi(cm);
+  else
+    cm->prev_mi = NULL;
 
   setup_plane_dequants(cm, xd, cm->base_qindex);
   vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fd74478e9..56dbc99ed 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -361,7 +361,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
     // If multiple threads are used to decode tiles, then we use those threads
     // to do parallel loopfiltering.
     if (pbi->num_tile_workers) {
-      vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+      vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0);
     } else {
       vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
     }
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index c9dc25191..b8250c2bb 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -49,9 +49,6 @@ typedef struct VP9Decoder {
 
   int decoded_key_frame;
 
-  int initial_width;
-  int initial_height;
-
   int do_loopfilter_inline;  // apply loopfilter to available rows immediately
   VP9Worker lf_worker;
 
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 9b124c9d9..9098063ce 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -40,13 +40,13 @@ static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
   const int nsync = lf_sync->sync_range;
 
   if (r && !(c & (nsync - 1))) {
-    mutex_lock(&lf_sync->mutex_[r - 1]);
+    pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+    mutex_lock(mutex);
 
     while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
-      pthread_cond_wait(&lf_sync->cond_[r - 1],
-                        &lf_sync->mutex_[r - 1]);
+      pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
     }
-    pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+    pthread_mutex_unlock(mutex);
   }
 #else
   (void)lf_sync;
@@ -94,21 +94,21 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
                                 VP9LfSync *const lf_sync, int num_lf_workers) {
   const int num_planes = y_only ? 1 : MAX_MB_PLANE;
   int r, c;  // SB row and col
-  LOOP_FILTER_MASK lfm;
   const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
 
   for (r = start; r < stop; r += num_lf_workers) {
     const int mi_row = r << MI_BLOCK_SIZE_LOG2;
-    MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
+    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
 
     for (c = 0; c < sb_cols; ++c) {
       const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+      LOOP_FILTER_MASK lfm;
       int plane;
 
       sync_read(lf_sync, r, c);
 
       vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
-      vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm);
+      vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
 
       for (plane = 0; plane < num_planes; ++plane) {
         vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
@@ -134,9 +134,9 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
 // threads.
 void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
                               VP9_COMMON *cm,
-                              MACROBLOCKD *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame) {
+  VP9LfSync *const lf_sync = &pbi->lf_row_sync;
   // Number of superblock rows and cols
   const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
   const int tile_cols = 1 << cm->log2_tile_cols;
@@ -146,8 +146,6 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
   // Allocate memory used in thread synchronization.
   // This always needs to be done even if frame_filter_level is 0.
   if (!cm->current_video_frame || cm->last_height != cm->height) {
-    VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
     if (cm->last_height != cm->height) {
       const int aligned_last_height =
           ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
@@ -166,8 +164,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
   vp9_loop_filter_frame_init(cm, frame_filter_level);
 
   // Initialize cur_sb_col to -1 for all SB rows.
-  vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
-             sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+  vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
 
   // Set up loopfilter thread data.
   // The decoder is using num_workers instead of pbi->num_tile_workers
@@ -194,7 +191,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
     lf_data->stop = sb_rows;
     lf_data->y_only = y_only;   // always do all planes in decoder
 
-    lf_data->lf_sync = &pbi->lf_row_sync;
+    lf_data->lf_sync = lf_sync;
     lf_data->num_lf_workers = num_workers;
 
     // Start loopfiltering
@@ -253,8 +250,12 @@ void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
 
 // Deallocate lf synchronization related mutex and data
 void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
-#if CONFIG_MULTITHREAD
+#if !CONFIG_MULTITHREAD
+  (void)rows;
+#endif  // !CONFIG_MULTITHREAD
+
   if (lf_sync != NULL) {
+#if CONFIG_MULTITHREAD
     int i;
 
     if (lf_sync->mutex_ != NULL) {
@@ -269,17 +270,10 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
       }
       vpx_free(lf_sync->cond_);
     }
-
+#endif  // CONFIG_MULTITHREAD
     vpx_free(lf_sync->cur_sb_col);
     // clear the structure as the source of this call may be a resize in which
     // case this call will be followed by an _alloc() which may fail.
-    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
+    vp9_zero(*lf_sync);
   }
-#else
-  (void)rows;
-  if (lf_sync != NULL) {
-    vpx_free(lf_sync->cur_sb_col);
-    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
-  }
-#endif  // CONFIG_MULTITHREAD
 }
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 005bd7bbd..8738ceebd 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -12,11 +12,9 @@
 #define VP9_DECODER_VP9_DTHREAD_H_
 
 #include "./vpx_config.h"
-#include "vp9/common/vp9_loopfilter.h"
 #include "vp9/decoder/vp9_reader.h"
 #include "vp9/decoder/vp9_thread.h"
 
-struct macroblockd;
 struct VP9Common;
 struct VP9Decoder;
 
@@ -43,16 +41,15 @@ typedef struct VP9LfSyncData {
 } VP9LfSync;
 
 // Allocate memory for loopfilter row synchronization.
-void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync,
                            int rows, int width);
 
 // Deallocate loopfilter synchronization related mutex and data.
-void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows);
 
 // Multi-threaded loopfilter that uses the tile threads.
 void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
                               struct VP9Common *cm,
-                              struct macroblockd *xd,
                               int frame_filter_level,
                               int y_only, int partial_frame);
 
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 787909142..e55881ffc 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -200,6 +200,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
 
     // Rate target ratio to set q delta.
     const float rate_ratio_qdelta = 2.0;
+    const double q = vp9_convert_qindex_to_q(cm->base_qindex);
     vp9_clear_system_state();
     // Some of these parameters may be set via codec-control function later.
     cr->max_sbs_perframe = 10;
@@ -209,14 +210,12 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
     // Set rate threshold to some fraction of target (and scaled by 256).
     cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
     // Distortion threshold, quadratic in Q, scale factor to be adjusted.
-    cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
-        vp9_convert_qindex_to_q(cm->base_qindex));
+    cr->thresh_dist_sb = 8 * (int)(q * q);
     if (cpi->sf.use_nonrd_pick_mode) {
       // May want to be more conservative with thresholds in non-rd mode for now
       // as rate/distortion are derived from model based on prediction residual.
       cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
-      cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
-          vp9_convert_qindex_to_q(cm->base_qindex));
+      cr->thresh_dist_sb = 4 * (int)(q * q);
     }
 
     cr->num_seg_blocks = 0;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 61a5022ec..70b70fec2 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -103,34 +103,31 @@ static const uint8_t VP9_VAR_OFFS[64] = {
 };
 
 static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
-                                              MACROBLOCK *x,
+                                              const struct buf_2d *ref,
                                               BLOCK_SIZE bs) {
-  unsigned int var, sse;
-  var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
-                           VP9_VAR_OFFS, 0, &sse);
+  unsigned int sse;
+  const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+                                              VP9_VAR_OFFS, 0, &sse);
   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
 }
 
 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
-                                                   MACROBLOCK *x,
-                                                   int mi_row,
-                                                   int mi_col,
+                                                   const struct buf_2d *ref,
+                                                   int mi_row, int mi_col,
                                                    BLOCK_SIZE bs) {
-  const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
-  int offset = (mi_row * MI_SIZE) * yv12->y_stride + (mi_col * MI_SIZE);
-  unsigned int var, sse;
-  var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
-                           x->plane[0].src.stride,
-                           yv12->y_buffer + offset,
-                           yv12->y_stride,
-                           &sse);
+  const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
+  const uint8_t* last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride +
+                                              mi_col * MI_SIZE];
+  unsigned int sse;
+  const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+                                              last_y, last->y_stride, &sse);
   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
 }
 
 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
                                                    int mi_row,
                                                    int mi_col) {
-  unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+  unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
                                                     mi_row, mi_col,
                                                     BLOCK_64X64);
   if (var < 8)
@@ -146,7 +143,7 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
 static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
                                                       int mi_row,
                                                       int mi_col) {
-  unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+  unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
                                                     mi_row, mi_col,
                                                     BLOCK_64X64);
   if (var < 4)
@@ -246,8 +243,8 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
 
   // R/D setup.
-  x->rddiv = cpi->RDDIV;
-  x->rdmult = cpi->RDMULT;
+  x->rddiv = cpi->rd.RDDIV;
+  x->rdmult = cpi->rd.RDMULT;
 
   // Setup segment ID.
   if (seg->enabled) {
@@ -822,6 +819,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
                          int output_enabled) {
   int i, x_idx, y;
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   struct macroblock_plane *const p = x->plane;
@@ -907,7 +905,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
 
   if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
     for (i = 0; i < TX_MODES; i++)
-      cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
+      rd_opt->tx_select_diff[i] += ctx->tx_rd_diff[i];
   }
 
 #if CONFIG_INTERNAL_STATS
@@ -940,12 +938,12 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
       }
     }
 
-    cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
-    cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
-    cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+    rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+    rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+    rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
 
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-      cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
+      rd_opt->filter_diff[i] += ctx->best_filter_diff[i];
   }
 }
 
@@ -1013,12 +1011,16 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   // Set to zero to make sure we do not use the previous encoded frame stats
   mbmi->skip = 0;
 
-  x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
+  x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+
+  // Save rdmult before it might be changed, so it can be restored later.
+  orig_rdmult = x->rdmult;
+  if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+    activity_masking(cpi, x);
 
   if (aq_mode == VARIANCE_AQ) {
     const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
                                             : vp9_block_energy(cpi, x, bsize);
-
     if (cm->frame_type == KEY_FRAME ||
         cpi->refresh_alt_ref_frame ||
         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
@@ -1031,14 +1033,6 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
 
     rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
     vp9_init_plane_quantizers(cpi, x);
-  }
-
-  // Save rdmult before it might be changed, so it can be restored later.
-  orig_rdmult = x->rdmult;
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-    activity_masking(cpi, x);
-
-  if (aq_mode == VARIANCE_AQ) {
     vp9_clear_system_state();
     x->rdmult = (int)round(x->rdmult * rdmult_ratio);
   } else if (aq_mode == COMPLEXITY_AQ) {
@@ -1070,14 +1064,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
                                     totaldist, bsize, ctx, best_rd);
   }
 
-  if (aq_mode == VARIANCE_AQ) {
-    x->rdmult = orig_rdmult;
-    if (*totalrate != INT_MAX) {
-      vp9_clear_system_state();
-      *totalrate = (int)round(*totalrate * rdmult_ratio);
-    }
-  } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) {
-    x->rdmult = orig_rdmult;
+  x->rdmult = orig_rdmult;
+
+  if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) {
+    vp9_clear_system_state();
+    *totalrate = (int)round(*totalrate * rdmult_ratio);
   }
 }
 
@@ -1364,6 +1355,25 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
   }
 }
 
+static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
+  MODE_INFO **prev_mi_8x8) {
+  const int mis = cm->mi_stride;
+  int block_row, block_col;
+
+  for (block_row = 0; block_row < 8; ++block_row) {
+    for (block_col = 0; block_col < 8; ++block_col) {
+      MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
+      const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+
+      if (prev_mi) {
+        const ptrdiff_t offset = prev_mi - cm->prev_mi;
+        mi_8x8[block_row * mis + block_col] = cm->mi + offset;
+        mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
+      }
+    }
+  }
+}
+
 static void constrain_copy_partitioning(VP9_COMP *const cpi,
                                         const TileInfo *const tile,
                                         MODE_INFO **mi_8x8,
@@ -1413,38 +1423,10 @@ static void constrain_copy_partitioning(VP9_COMP *const cpi,
     }
   } else {
     // Else this is a partial SB64, copy previous partition.
-    for (block_row = 0; block_row < 8; ++block_row) {
-      for (block_col = 0; block_col < 8; ++block_col) {
-        MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
-        const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-        if (prev_mi) {
-          const ptrdiff_t offset = prev_mi - cm->prev_mi;
-          mi_8x8[block_row * mis + block_col] = cm->mi + offset;
-          mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
-        }
-      }
-    }
+    copy_partitioning(cm, mi_8x8, prev_mi_8x8);
   }
 }
 
-static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
-                              MODE_INFO **prev_mi_8x8) {
-  const int mis = cm->mi_stride;
-  int block_row, block_col;
-
-  for (block_row = 0; block_row < 8; ++block_row) {
-    for (block_col = 0; block_col < 8; ++block_col) {
-      MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
-      const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-
-      if (prev_mi) {
-        const ptrdiff_t offset = prev_mi - cm->prev_mi;
-        mi_8x8[block_row * mis + block_col] = cm->mi + offset;
-        mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
-      }
-    }
-  }
-}
 
 const struct {
   int row;
@@ -1465,13 +1447,14 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
                                            MODE_INFO **mi_8x8,
                                            int mi_row, int mi_col) {
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *x = &cpi->mb;
+  MACROBLOCK *const x = &cpi->mb;
   const int mis = cm->mi_stride;
-  int row8x8_remaining = tile->mi_row_end - mi_row;
-  int col8x8_remaining = tile->mi_col_end - mi_col;
-  int r, c;
+  const int row8x8_remaining = tile->mi_row_end - mi_row;
+  const int col8x8_remaining = tile->mi_col_end - mi_col;
   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
 
+  vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
 
   // In-image SB64
@@ -1483,8 +1466,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
     const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
                            (mi_col * MI_SIZE);
     const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
-    const int thr_32x32 = cpi->sf.source_var_thresh;
-    const int thr_64x64 = thr_32x32 << 1;
+    const unsigned int thr_32x32 = cpi->sf.source_var_thresh;
+    const unsigned int thr_64x64 = thr_32x32 << 1;
     int i, j;
     int index;
     diff d32[4];
@@ -1550,16 +1533,13 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
     BLOCK_SIZE bsize = BLOCK_16X16;
     int bh = num_8x8_blocks_high_lookup[bsize];
     int bw = num_8x8_blocks_wide_lookup[bsize];
-
+    int r, c;
     for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
       for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
-        int index = r * mis + c;
-        // Find a partition size that fits
-        bsize = find_partition_size(bsize,
-                                    (row8x8_remaining - r),
-                                    (col8x8_remaining - c), &bh, &bw);
+        const int index = r * mis + c;
         mi_8x8[index] = mi_upper_left + index;
-        mi_8x8[index]->mbmi.sb_type = bsize;
+        mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
+            row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
       }
     }
   }
@@ -2234,7 +2214,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
   if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
     unsigned int source_variancey;
     vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
-    source_variancey = get_sby_perpixel_variance(cpi, x, bsize);
+    source_variancey = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
     if (source_variancey < cpi->sf.disable_split_var_thresh) {
       do_split = 0;
       if (source_variancey < cpi->sf.disable_split_var_thresh / 2)
@@ -2668,9 +2648,10 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
     if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
       return ALLOW_32X32;
     } else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
+      const RD_OPT *const rd_opt = &cpi->rd;
       const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
-      return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
-                 cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
+      return rd_opt->tx_select_threshes[frame_type][ALLOW_32X32] >
+                 rd_opt->tx_select_threshes[frame_type][TX_MODE_SELECT] ?
                      ALLOW_32X32 : TX_MODE_SELECT;
     } else {
       unsigned int total = 0;
@@ -3201,7 +3182,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                             1, &dummy_rate, &dummy_dist);
         break;
       case SOURCE_VAR_BASED_PARTITION:
-        set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
         set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
         nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
                             1, &dummy_rate, &dummy_dist);
@@ -3232,8 +3212,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
 }
 // end RTC play code
 
+static int get_skip_encode_frame(const VP9_COMMON *cm) {
+  unsigned int intra_count = 0, inter_count = 0;
+  int j;
+
+  for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+    intra_count += cm->counts.intra_inter[j][0];
+    inter_count += cm->counts.intra_inter[j][1];
+  }
+
+  return (intra_count << 2) < inter_count &&
+         cm->frame_type != KEY_FRAME &&
+         cm->show_frame;
+}
+
 static void encode_frame_internal(VP9_COMP *cpi) {
   SPEED_FEATURES *const sf = &cpi->sf;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCK *const x = &cpi->mb;
   VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
@@ -3244,10 +3239,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   vp9_zero(cm->counts);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cpi->tx_stepdown_count);
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
+  vp9_zero(rd_opt->comp_pred_diff);
+  vp9_zero(rd_opt->filter_diff);
+  vp9_zero(rd_opt->tx_select_diff);
+  vp9_zero(rd_opt->tx_select_threshes);
 
   cm->tx_mode = select_tx_mode(cpi);
 
@@ -3266,7 +3261,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
     build_activity_map(cpi);
 
-  cm->prev_mi = get_prev_mi(cm);
+  set_prev_mi(cm);
 
   if (sf->use_nonrd_pick_mode) {
     // Initialize internal buffer pointers for rtc coding, where non-RD
@@ -3284,9 +3279,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     }
     vp9_zero(x->zcoeff_blk);
 
-    if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION &&
+    if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION &&
         cm->current_video_frame > 0) {
-      int check_freq = cpi->sf.search_type_check_frequency;
+      int check_freq = sf->search_type_check_frequency;
 
       if ((cm->current_video_frame - 1) % check_freq == 0) {
         cpi->use_large_partition_rate = 0;
@@ -3303,7 +3298,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
 
       if ((cm->current_video_frame - 1) % check_freq >= 1) {
         if (cpi->use_large_partition_rate < 15)
-          cpi->sf.partition_search_type = FIXED_PARTITION;
+          sf->partition_search_type = FIXED_PARTITION;
       }
     }
   }
@@ -3344,19 +3339,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
   }
 
-  if (sf->skip_encode_sb) {
-    int j;
-    unsigned int intra_count = 0, inter_count = 0;
-    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
-      intra_count += cm->counts.intra_inter[j][0];
-      inter_count += cm->counts.intra_inter[j][1];
-    }
-    sf->skip_encode_frame = (intra_count << 2) < inter_count &&
-                            cm->frame_type != KEY_FRAME &&
-                            cm->show_frame;
-  } else {
-    sf->skip_encode_frame = 0;
-  }
+  sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm) : 0;
 
 #if 0
   // Keep record of the total distortion this time around for future use
@@ -3366,6 +3349,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
 
   // In the longer term the encoder should be generalized to match the
   // decoder such that we allow compound where one of the 3 buffers has a
@@ -3398,8 +3382,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     // that for subsequent frames.
     // It does the same analysis for transform size selection also.
     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
-    const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
-    const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
+    const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type];
+    const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type];
 
     /* prediction (compound, single or hybrid) mode selection */
     if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
@@ -3432,25 +3416,25 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
-      const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
-      cpi->rd_prediction_type_threshes[frame_type][i] += diff;
-      cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
+      const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs);
+      rd_opt->prediction_type_threshes[frame_type][i] += diff;
+      rd_opt->prediction_type_threshes[frame_type][i] >>= 1;
     }
 
     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
-      const int64_t diff = cpi->rd_filter_diff[i] / cm->MBs;
-      cpi->rd_filter_threshes[frame_type][i] =
-          (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
+      const int64_t diff = rd_opt->filter_diff[i] / cm->MBs;
+      rd_opt->filter_threshes[frame_type][i] =
+          (rd_opt->filter_threshes[frame_type][i] + diff) / 2;
     }
 
     for (i = 0; i < TX_MODES; ++i) {
-      int64_t pd = cpi->rd_tx_select_diff[i];
+      int64_t pd = rd_opt->tx_select_diff[i];
       int diff;
       if (i == TX_MODE_SELECT)
         pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0);
       diff = (int) (pd / cm->MBs);
-      cpi->rd_tx_select_threshes[frame_type][i] += diff;
-      cpi->rd_tx_select_threshes[frame_type][i] /= 2;
+      rd_opt->tx_select_threshes[frame_type][i] += diff;
+      rd_opt->tx_select_threshes[frame_type][i] /= 2;
     }
 
     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 5e98e4e3f..b0c014eef 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -63,24 +63,17 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
 }
 
 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
-typedef struct vp9_token_state vp9_token_state;
 
-struct vp9_token_state {
+typedef struct vp9_token_state {
   int           rate;
   int           error;
   int           next;
   signed char   token;
   short         qc;
-};
+} vp9_token_state;
 
 // TODO(jimbankoski): experiment to find optimal RD numbers.
-#define Y1_RD_MULT 4
-#define UV_RD_MULT 2
-
-static const int plane_rd_mult[4] = {
-  Y1_RD_MULT,
-  UV_RD_MULT,
-};
+static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
 
 #define UPDATE_RD_COST()\
 {\
@@ -105,60 +98,56 @@ static int trellis_get_coeff_context(const int16_t *scan,
   return pt;
 }
 
-static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
-                       TX_SIZE tx_size, MACROBLOCK *mb,
-                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+static int optimize_b(MACROBLOCK *mb, int plane, int block,
+                      BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) {
   MACROBLOCKD *const xd = &mb->e_mbd;
-  struct macroblock_plane *p = &mb->plane[plane];
-  struct macroblockd_plane *pd = &xd->plane[plane];
+  struct macroblock_plane *const p = &mb->plane[plane];
+  struct macroblockd_plane *const pd = &xd->plane[plane];
   const int ref = is_inter_block(&xd->mi[0]->mbmi);
   vp9_token_state tokens[1025][2];
   unsigned best_index[1025][2];
-  const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
-  int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
-  int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
-  int eob = p->eobs[block], final_eob, sz = 0;
-  const int i0 = 0;
-  int rc, x, next, i;
-  int64_t rdmult, rddiv, rd_cost0, rd_cost1;
-  int rate0, rate1, error0, error1, t0, t1;
-  int best, band, pt;
-  PLANE_TYPE type = pd->plane_type;
-  int err_mult = plane_rd_mult[type];
+  uint8_t token_cache[1024];
+  const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+  int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+  int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+  const int eob = p->eobs[block];
+  const PLANE_TYPE type = pd->plane_type;
   const int default_eob = 16 << (tx_size << 1);
   const int mul = 1 + (tx_size == TX_32X32);
-  uint8_t token_cache[1024];
   const int16_t *dequant_ptr = pd->dequant;
   const uint8_t *const band_translate = get_band_translate(tx_size);
-  const scan_order *so = get_scan(xd, tx_size, type, block);
-  const int16_t *scan = so->scan;
-  const int16_t *nb = so->neighbors;
+  const scan_order *const so = get_scan(xd, tx_size, type, block);
+  const int16_t *const scan = so->scan;
+  const int16_t *const nb = so->neighbors;
+  int next = eob, sz = 0;
+  int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
+  int64_t rd_cost0, rd_cost1;
+  int rate0, rate1, error0, error1, t0, t1;
+  int best, band, pt, i, final_eob;
 
   assert((!type && !plane) || (type && plane));
   assert(eob <= default_eob);
 
   /* Now set up a Viterbi trellis to evaluate alternative roundings. */
-  rdmult = mb->rdmult * err_mult;
-  if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi))
+  if (!ref)
     rdmult = (rdmult * 9) >> 4;
-  rddiv = mb->rddiv;
+
   /* Initialize the sentinel node of the trellis. */
   tokens[eob][0].rate = 0;
   tokens[eob][0].error = 0;
   tokens[eob][0].next = default_eob;
   tokens[eob][0].token = EOB_TOKEN;
   tokens[eob][0].qc = 0;
-  *(tokens[eob] + 1) = *(tokens[eob] + 0);
-  next = eob;
+  tokens[eob][1] = tokens[eob][0];
+
   for (i = 0; i < eob; i++)
-    token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
-        qcoeff[scan[i]]].token];
+    token_cache[scan[i]] =
+        vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token];
 
-  for (i = eob; i-- > i0;) {
+  for (i = eob; i-- > 0;) {
     int base_bits, d2, dx;
-
-    rc = scan[i];
-    x = qcoeff[rc];
+    const int rc = scan[i];
+    int x = qcoeff[rc];
     /* Only add a trellis state for non-zero coefficients. */
     if (x) {
       int shortcut = 0;
@@ -172,17 +161,15 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
       if (next < default_eob) {
         band = band_translate[i + 1];
         pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
-        rate0 +=
-          mb->token_costs[tx_size][type][ref][band][0][pt]
-                         [tokens[next][0].token];
-        rate1 +=
-          mb->token_costs[tx_size][type][ref][band][0][pt]
-                         [tokens[next][1].token];
+        rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
+                                [tokens[next][0].token];
+        rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
+                                [tokens[next][1].token];
       }
       UPDATE_RD_COST();
       /* And pick the best. */
       best = rd_cost1 < rd_cost0;
-      base_bits = *(vp9_dct_value_cost_ptr + x);
+      base_bits = vp9_dct_value_cost_ptr[x];
       dx = mul * (dqcoeff[rc] - coeff[rc]);
       d2 = dx * dx;
       tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
@@ -196,9 +183,9 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
       rate0 = tokens[next][0].rate;
       rate1 = tokens[next][1].rate;
 
-      if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
-          (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
-                                         dequant_ptr[rc != 0]))
+      if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+          (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+                                               dequant_ptr[rc != 0]))
         shortcut = 1;
       else
         shortcut = 0;
@@ -235,7 +222,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
       UPDATE_RD_COST();
       /* And pick the best. */
       best = rd_cost1 < rd_cost0;
-      base_bits = *(vp9_dct_value_cost_ptr + x);
+      base_bits = vp9_dct_value_cost_ptr[x];
 
       if (shortcut) {
         dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
@@ -274,26 +261,26 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
 
   /* Now pick the best path through the whole trellis. */
   band = band_translate[i + 1];
-  pt = combine_entropy_contexts(*a, *l);
   rate0 = tokens[next][0].rate;
   rate1 = tokens[next][1].rate;
   error0 = tokens[next][0].error;
   error1 = tokens[next][1].error;
   t0 = tokens[next][0].token;
   t1 = tokens[next][1].token;
-  rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
-  rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
+  rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
+  rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
   UPDATE_RD_COST();
   best = rd_cost1 < rd_cost0;
-  final_eob = i0 - 1;
+  final_eob = -1;
   vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
   vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
   for (i = next; i < eob; i = next) {
-    x = tokens[i][best].qc;
+    const int x = tokens[i][best].qc;
+    const int rc = scan[i];
     if (x) {
       final_eob = i;
     }
-    rc = scan[i];
+
     qcoeff[rc] = x;
     dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
 
@@ -303,7 +290,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
   final_eob++;
 
   mb->plane[plane].eobs[block] = final_eob;
-  *a = *l = (final_eob > 0);
+  return final_eob;
 }
 
 static INLINE void fdct32x32(int rd_transform,
@@ -393,7 +380,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
-    optimize_b(plane, block, plane_bsize, tx_size, x, a, l);
+    const int ctx = combine_entropy_contexts(*a, *l);
+    *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0;
   } else {
     *a = *l = p->eobs[block] > 0;
   }
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c51f43fa8..874767de7 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -376,15 +376,12 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
   }
 }
 
-static unsigned int zz_motion_search(const MACROBLOCK *x) {
-  const MACROBLOCKD *const xd = &x->e_mbd;
-  const uint8_t *const src = x->plane[0].src.buf;
-  const int src_stride = x->plane[0].src.stride;
-  const uint8_t *const ref = xd->plane[0].pre[0].buf;
-  const int ref_stride = xd->plane[0].pre[0].stride;
+static unsigned int get_prediction_error(BLOCK_SIZE bsize,
+                                         const struct buf_2d *src,
+                                         const struct buf_2d *ref) {
   unsigned int sse;
-  vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type);
-  fn(src, src_stride, ref, ref_stride, &sse);
+  const vp9_variance_fn_t fn = get_block_variance_fn(bsize);
+  fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
   return sse;
 }
 
@@ -416,9 +413,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   // Center the initial step/diamond search on best mv.
   tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
                                     step_param,
-                                    x->sadperbit16, &num00, &v_fn_ptr,
-                                    x->nmvjointcost,
-                                    x->mvcost, ref_mv);
+                                    x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
   if (tmp_err < INT_MAX)
     tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
   if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -442,9 +437,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     } else {
       tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
                                         step_param + n, x->sadperbit16,
-                                        &num00, &v_fn_ptr,
-                                        x->nmvjointcost,
-                                        x->mvcost, ref_mv);
+                                        &num00, &v_fn_ptr, ref_mv);
       if (tmp_err < INT_MAX)
         tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
       if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -632,7 +625,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
         int_mv mv, tmp_mv;
 
         xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
-        motion_error = zz_motion_search(x);
+        motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                            &xd->plane[0].pre[0]);
         // Assume 0,0 motion with no mv overhead.
         mv.as_int = tmp_mv.as_int = 0;
 
@@ -668,7 +662,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
           int gf_motion_error;
 
           xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
-          gf_motion_error = zz_motion_search(x);
+          gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+                                                 &xd->plane[0].pre[0]);
 
           first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
                                    &gf_motion_error);
@@ -980,8 +975,6 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
                                    oxcf->target_bandwidth / 10000000.0);
   }
 
-  cpi->output_framerate = oxcf->framerate;
-
   // Calculate a minimum intra value to be used in determining the IIratio
   // scores used in the second pass. We have this minimum to make sure
   // that clips that are static but "low complexity" in the intra domain
@@ -2186,7 +2179,7 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       (cm->frame_flags & FRAMEFLAGS_KEY))) {
+       (cpi->frame_flags & FRAMEFLAGS_KEY))) {
     cm->frame_type = KEY_FRAME;
   } else {
     cm->frame_type = INTER_FRAME;
@@ -2256,7 +2249,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   // Keyframe and section processing.
   if (rc->frames_to_key == 0 ||
-      (cm->frame_flags & FRAMEFLAGS_KEY)) {
+      (cpi->frame_flags & FRAMEFLAGS_KEY)) {
     // Define next KF group and assign bits to it.
     this_frame_copy = this_frame;
     find_next_key_frame(cpi, &this_frame_copy);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 44b171fd1..a9da7283a 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -77,12 +77,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
           INT_MAX);
 }
 
-static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
+static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
                                   int_mv *dst_mv, int mb_row, int mb_col) {
   MACROBLOCK *const x = &cpi->mb;
   MACROBLOCKD *const xd = &x->e_mbd;
   unsigned int err, tmp_err;
-  int_mv tmp_mv;
+  MV tmp_mv;
 
   // Try zero MV first
   // FIXME should really use something like near/nearest MV and/or MV prediction
@@ -93,24 +93,22 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
 
   // Test last reference frame using the previous best mv as the
   // starting point (best reference) for the search
-  tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv.as_mv,
-                                      mb_row, mb_col);
+  tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
   if (tmp_err < err) {
     err = tmp_err;
-    dst_mv->as_int = tmp_mv.as_int;
+    dst_mv->as_mv = tmp_mv;
   }
 
   // If the current best reference mv is not centered on 0,0 then do a 0,0
   // based search as well.
-  if (ref_mv->as_int) {
+  if (ref_mv->row != 0 || ref_mv->col != 0) {
     unsigned int tmp_err;
-    int_mv zero_ref_mv, tmp_mv;
+    MV zero_ref_mv = {0, 0}, tmp_mv;
 
-    zero_ref_mv.as_int = 0;
-    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
+    tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
                                         mb_row, mb_col);
     if (tmp_err < err) {
-      dst_mv->as_int = tmp_mv.as_int;
+      dst_mv->as_mv = tmp_mv;
       err = tmp_err;
     }
   }
@@ -173,7 +171,7 @@ static void update_mbgraph_mb_stats
   YV12_BUFFER_CONFIG *buf,
   int mb_y_offset,
   YV12_BUFFER_CONFIG *golden_ref,
-  int_mv *prev_golden_ref_mv,
+  const MV *prev_golden_ref_mv,
   YV12_BUFFER_CONFIG *alt_ref,
   int mb_row,
   int mb_col
@@ -239,13 +237,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
 
   int mb_col, mb_row, offset = 0;
   int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
-  int_mv arf_top_mv, gld_top_mv;
+  MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
   MODE_INFO mi_local = { { 0 } };
 
   // Set up limit values for motion vectors to prevent them extending outside
   // the UMV borders.
-  arf_top_mv.as_int = 0;
-  gld_top_mv.as_int = 0;
   x->mv_row_min     = -BORDER_MV_PIXELS_B16;
   x->mv_row_max     = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
   xd->up_available  = 0;
@@ -258,15 +254,13 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
   mi_local.mbmi.ref_frame[1] = NONE;
 
   for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
-    int_mv arf_left_mv, gld_left_mv;
+    MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv;
     int mb_y_in_offset  = mb_y_offset;
     int arf_y_in_offset = arf_y_offset;
     int gld_y_in_offset = gld_y_offset;
 
     // Set up limit values for motion vectors to prevent them extending outside
     // the UMV borders.
-    arf_left_mv.as_int = arf_top_mv.as_int;
-    gld_left_mv.as_int = gld_top_mv.as_int;
     x->mv_col_min      = -BORDER_MV_PIXELS_B16;
     x->mv_col_max      = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
     xd->left_available = 0;
@@ -277,11 +271,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
       update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
                               golden_ref, &gld_left_mv, alt_ref,
                               mb_row, mb_col);
-      arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
-      gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
+      arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv;
+      gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
       if (mb_col == 0) {
-        arf_top_mv.as_int = arf_left_mv.as_int;
-        gld_top_mv.as_int = gld_left_mv.as_int;
+        arf_top_mv = arf_left_mv;
+        gld_top_mv = gld_left_mv;
       }
       xd->left_available = 1;
       mb_y_in_offset    += 16;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index f7a02a4a7..8a7901172 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -66,7 +66,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) {
 }
 
 static INLINE int mv_cost(const MV *mv,
-                          const int *joint_cost, int *comp_cost[2]) {
+                          const int *joint_cost, int *const comp_cost[2]) {
   return joint_cost[vp9_get_mv_joint(mv)] +
              comp_cost[0][mv->row] + comp_cost[1][mv->col];
 }
@@ -90,14 +90,13 @@ static int mv_err_cost(const MV *mv, const MV *ref,
   return 0;
 }
 
-static int mvsad_err_cost(const MV *mv, const MV *ref,
-                          const int *mvjsadcost, int *mvsadcost[2],
+static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
                           int error_per_bit) {
-  if (mvsadcost) {
+  if (x->nmvsadcost) {
     const MV diff = { mv->row - ref->row,
                       mv->col - ref->col };
-    return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
-                                  error_per_bit, 8);
+    return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
+                                      x->nmvsadcost) * error_per_bit, 8);
   }
   return 0;
 }
@@ -170,14 +169,13 @@ static INLINE int sp(int x) {
   return (x & 7) << 1;
 }
 
-static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c,
-                                 int offset) {
-  return &buf[(r >> 3) * stride + (c >> 3) - offset];
+static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
+  return &buf[(r >> 3) * stride + (c >> 3)];
 }
 
 /* returns subpixel variance error function */
 #define DIST(r, c) \
-    vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
+    vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
              src_stride, &sse)
 
 /* checks if (r, c) has better score than previous best */
@@ -270,7 +268,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
                                  int *mvjcost, int *mvcost[2],
                                  int *distortion,
                                  unsigned int *sse1) {
-  const uint8_t *z = x->plane[0].src.buf;
+  const uint8_t *const z = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int besterr = INT_MAX;
@@ -283,7 +281,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
 
   const int y_stride = xd->plane[0].pre[0].stride;
   const int offset = bestmv->row * y_stride + bestmv->col;
-  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+  const uint8_t *const y = xd->plane[0].pre[0].buf;
 
   int rr = ref_mv->row;
   int rc = ref_mv->col;
@@ -303,7 +301,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
   bestmv->col *= 8;
 
   // calculate central point error
-  besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+  besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
 
@@ -353,7 +351,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
 #undef DIST
 /* returns subpixel variance error function */
 #define DIST(r, c) \
-    vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
+    vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
               z, src_stride, &sse, second_pred)
 
 int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
@@ -368,7 +366,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
                                       unsigned int *sse1,
                                       const uint8_t *second_pred,
                                       int w, int h) {
-  const uint8_t *z = x->plane[0].src.buf;
+  const uint8_t *const z = x->plane[0].src.buf;
   const int src_stride = x->plane[0].src.stride;
   const MACROBLOCKD *xd = &x->e_mbd;
   unsigned int besterr = INT_MAX;
@@ -382,7 +380,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
   DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
   const int y_stride = xd->plane[0].pre[0].stride;
   const int offset = bestmv->row * y_stride + bestmv->col;
-  const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+  const uint8_t *const y = xd->plane[0].pre[0].buf;
 
   int rr = ref_mv->row;
   int rc = ref_mv->col;
@@ -404,7 +402,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
   // calculate central point error
   // TODO(yunqingwang): central pointer error was already calculated in full-
   // pixel search, and can be passed in this function.
-  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+  vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
   besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
   *distortion = besterr;
   besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -479,8 +477,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
   {\
     if (thissad < bestsad) {\
       if (use_mvcost) \
-        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
-                                  mvjsadcost, mvsadcost, sad_per_bit);\
+        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
       if (thissad < bestsad) {\
         bestsad = thissad;\
         best_site = i;\
@@ -520,9 +517,6 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   int k = -1;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_init_s = search_param_to_steps[search_param];
-  const int *const mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   // adjust ref_mv to make sure it is within MV range
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   br = ref_mv->row;
@@ -531,8 +525,8 @@ static int vp9_pattern_search(const MACROBLOCK *x,
   // Work out the start point for the search
   bestsad = vfp->sdf(what->buf, what->stride,
                      get_buf_from_mv(in_what, ref_mv), in_what->stride,
-                     0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
-                         mvjsadcost, mvsadcost, sad_per_bit);
+                     0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv,
+                                                  sad_per_bit);
 
   // Search all possible scales upto the search param around the center point
   // pick the scale of the point that is best as the starting scale of
@@ -880,7 +874,6 @@ int vp9_fast_dia_search(const MACROBLOCK *x,
 int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
                             int search_param, int sad_per_bit, int *num00,
                             const vp9_variance_fn_ptr_t *fn_ptr,
-                            int *mvjcost, int *mvcost[2],
                             const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *what = x->plane[0].src.buf;
@@ -893,10 +886,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 
   unsigned int thissad;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   int tr, tc;
   int best_tr = 0;
   int best_tc = 0;
@@ -918,8 +907,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   start_row = MAX(-range, x->mv_row_min - ref_row);
   start_col = MAX(-range, x->mv_col_min - ref_col);
@@ -940,8 +928,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
           if (sad_array[i] < bestsad) {
             const MV this_mv = {ref_row + tr, ref_col + tc + i};
             thissad = sad_array[i] +
-                      mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
+                      mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (thissad < bestsad) {
               bestsad = thissad;
               best_tr = tr;
@@ -957,8 +944,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
 
           if (thissad < bestsad) {
             const MV this_mv = {ref_row + tr, ref_col + tc + i};
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
+            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
 
             if (thissad < bestsad) {
               bestsad = thissad;
@@ -979,7 +965,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                              MV *ref_mv, MV *best_mv,
                              int search_param, int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
@@ -991,23 +976,22 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
   const search_site *const ss = &x->ss[search_param * x->searches_per_step];
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-  const uint8_t *best_address;
+  const uint8_t *best_address, *in_what_ref;
   int best_sad = INT_MAX;
   int best_site = 0;
   int last_site = 0;
   int i, j, step;
 
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
-  best_address = get_buf_from_mv(in_what, ref_mv);
+  in_what_ref = get_buf_from_mv(in_what, ref_mv);
+  best_address = in_what_ref;
   *num00 = 0;
   *best_mv = *ref_mv;
 
   // Check the starting position
   best_sad = fn_ptr->sdf(what->buf, what->stride,
-                        in_what->buf, in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+                         best_address, in_what->stride, 0x7fffffff) +
+      mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
 
@@ -1020,8 +1004,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                              best_address + ss[i].offset, in_what->stride,
                              best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                                sad_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = i;
@@ -1046,8 +1029,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
                                 best_address + ss[best_site].offset,
                                 in_what->stride, best_sad);
           if (sad < best_sad) {
-            sad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, sad_per_bit);
+            sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (sad < best_sad) {
               best_sad = sad;
               best_mv->row += ss[best_site].mv.row;
@@ -1060,7 +1042,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
         break;
       };
 #endif
-    } else if (best_address == in_what->buf) {
+    } else if (best_address == in_what_ref) {
       (*num00)++;
     }
   }
@@ -1071,7 +1053,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
                              MV *ref_mv, MV *best_mv, int search_param,
                              int sad_per_bit, int *num00,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv) {
   int i, j, step;
 
@@ -1098,10 +1079,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
   const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
 
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
   ref_row = ref_mv->row;
   ref_col = ref_mv->col;
@@ -1115,8 +1092,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
 
   // Check the starting position
   bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
-                + mvsad_err_cost(best_mv, &fcenter_mv,
-                                 mvjsadcost, mvsadcost, sad_per_bit);
+                + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   i = 1;
 
@@ -1149,9 +1125,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
           if (sad_array[t] < bestsad) {
             const MV this_mv = {best_mv->row + ss[i].mv.row,
                                 best_mv->col + ss[i].mv.col};
-            sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                           mvjsadcost, mvsadcost, sad_per_bit);
-
+            sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
+                                           sad_per_bit);
             if (sad_array[t] < bestsad) {
               bestsad = sad_array[t];
               best_site = i;
@@ -1171,9 +1146,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
                                              in_what_stride, bestsad);
 
           if (thissad < bestsad) {
-            thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                      mvjsadcost, mvsadcost, sad_per_bit);
-
+            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
             if (thissad < bestsad) {
               bestsad = thissad;
               best_site = i;
@@ -1231,8 +1204,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
   int thissme, n, num00 = 0;
   int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
                                         step_param, sadpb, &n,
-                                        fn_ptr, x->nmvjointcost,
-                                        x->mvcost, ref_mv);
+                                        fn_ptr, ref_mv);
   if (bestsme < INT_MAX)
     bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
   *dst_mv = temp_mv;
@@ -1250,8 +1222,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
     } else {
       thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
                                         step_param + n, sadpb, &num00,
-                                        fn_ptr, x->nmvjointcost, x->mvcost,
-                                        ref_mv);
+                                        fn_ptr, ref_mv);
       if (thissme < INT_MAX)
         thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
 
@@ -1271,8 +1242,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
     const int search_range = 8;
     MV best_mv = *dst_mv;
     thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
-                                       fn_ptr, x->nmvjointcost, x->mvcost,
-                                       ref_mv);
+                                       fn_ptr, ref_mv);
     if (thissme < INT_MAX)
       thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
     if (thissme < bestsme) {
@@ -1286,7 +1256,6 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
 int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   int r, c;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1296,12 +1265,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
   const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
   const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
   const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
   int best_sad = fn_ptr->sdf(what->buf, what->stride,
       get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
   *best_mv = *ref_mv;
 
   for (r = row_min; r < row_max; ++r) {
@@ -1309,9 +1276,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
       const MV mv = {r, c};
       const int sad = fn_ptr->sdf(what->buf, what->stride,
           get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
-          mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                         sad_per_bit);
-
+              mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
       if (sad < best_sad) {
         best_sad = sad;
         *best_mv = mv;
@@ -1324,7 +1289,6 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
 int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
@@ -1346,8 +1310,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
   const int col_max = MIN(ref_col + distance, x->mv_col_max);
   unsigned int sad_array[3];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
 
   // Work out the mid point for the search
   const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
@@ -1358,8 +1320,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
   // Baseline value at the centre
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv,
-                             mvjsadcost, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   for (r = row_min; r < row_max; r++) {
     const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1376,9 +1337,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1396,9 +1355,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
 
       if (thissad < bestsad) {
         this_mv.col = c;
-        thissad  += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                   mvjsadcost, mvsadcost, sad_per_bit);
-
+        thissad  += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
         if (thissad < bestsad) {
           bestsad = thissad;
           best_mv->row = r;
@@ -1416,7 +1373,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
 int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
                           int sad_per_bit, int distance,
                           const vp9_variance_fn_ptr_t *fn_ptr,
-                          int *mvjcost, int *mvcost[2],
                           const MV *center_mv, MV *best_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const uint8_t *const what = x->plane[0].src.buf;
@@ -1439,9 +1395,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
   unsigned int sad_array[3];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
 
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   // Work out the mid point for the search
   const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
 
@@ -1451,8 +1404,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
   // Baseline value at the center
   bestsad = fn_ptr->sdf(what, what_stride,
                         bestaddress, in_what_stride, 0x7fffffff)
-            + mvsad_err_cost(best_mv, &fcenter_mv,
-                             mvjsadcost, mvsadcost, sad_per_bit);
+            + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
 
   for (r = row_min; r < row_max; r++) {
     const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1469,9 +1421,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1494,9 +1444,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
 
         if (thissad < bestsad) {
           this_mv.col = c;
-          thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, sad_per_bit);
-
+          thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
           if (thissad < bestsad) {
             bestsad = thissad;
             best_mv->row = r;
@@ -1515,9 +1463,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
 
       if (thissad < bestsad) {
         this_mv.col = c;
-        thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, sad_per_bit);
-
+        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
         if (thissad < bestsad) {
           bestsad = thissad;
           best_mv->row = r;
@@ -1536,20 +1482,16 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
                               MV *ref_mv, int error_per_bit,
                               int search_range,
                               const vp9_variance_fn_ptr_t *fn_ptr,
-                              int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   const MACROBLOCKD *const xd = &x->e_mbd;
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
                                      get_buf_from_mv(in_what, ref_mv),
                                      in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; i++) {
@@ -1562,8 +1504,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
         unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
             get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
-                                error_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = j;
@@ -1586,19 +1527,16 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
                               MV *ref_mv, int error_per_bit,
                               int search_range,
                               const vp9_variance_fn_ptr_t *fn_ptr,
-                              int *mvjcost, int *mvcost[2],
                               const MV *center_mv) {
   const MACROBLOCKD *const xd = &x->e_mbd;
   const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
   unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
                                     in_what->stride, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; i++) {
@@ -1623,9 +1561,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
         if (sads[j] < best_sad) {
           const MV mv = {ref_mv->row + neighbors[j].row,
                          ref_mv->col + neighbors[j].col};
-          sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
-                                         mvjsadcost, mvsadcost, error_per_bit);
-
+          sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sads[j] < best_sad) {
             best_sad = sads[j];
             best_site = j;
@@ -1642,9 +1578,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
                                          get_buf_from_mv(in_what, &mv),
                                          in_what->stride, best_sad);
           if (sad < best_sad) {
-            sad += mvsad_err_cost(&mv, &fcenter_mv,
-                                  mvjsadcost, mvsadcost, error_per_bit);
-
+            sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
             if (sad < best_sad) {
               best_sad = sad;
               best_site = j;
@@ -1672,7 +1606,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv,
                              const uint8_t *second_pred, int w, int h) {
   const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
@@ -1681,12 +1614,10 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
   const struct buf_2d *const what = &x->plane[0].src;
   const struct buf_2d *const in_what = &xd->plane[0].pre[0];
   const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-  const int *mvjsadcost = x->nmvjointsadcost;
-  int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
   unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
       get_buf_from_mv(in_what, ref_mv), in_what->stride,
       second_pred, 0x7fffffff) +
-      mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
   int i, j;
 
   for (i = 0; i < search_range; ++i) {
@@ -1701,8 +1632,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
             get_buf_from_mv(in_what, &mv), in_what->stride,
             second_pred, best_sad);
         if (sad < best_sad) {
-          sad += mvsad_err_cost(&mv, &fcenter_mv,
-                                    mvjsadcost, mvsadcost, error_per_bit);
+          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
           if (sad < best_sad) {
             best_sad = sad;
             best_site = j;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index f7b7c5e49..70d7985e4 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -110,14 +110,12 @@ typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
                                     const MV *ref_mv, int sad_per_bit,
                                     int distance,
                                     const vp9_variance_fn_ptr_t *fn_ptr,
-                                    int *mvjcost, int *mvcost[2],
                                     const MV *center_mv, MV *best_mv);
 
 typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
                                         MV *ref_mv, int sad_per_bit,
                                         int distance,
                                         const vp9_variance_fn_ptr_t *fn_ptr,
-                                        int *mvjcost, int *mvcost[2],
                                         const MV *center_mv);
 
 typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
@@ -125,14 +123,12 @@ typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
                                        int search_param, int sad_per_bit,
                                        int *num00,
                                        const vp9_variance_fn_ptr_t *fn_ptr,
-                                       int *mvjcost, int *mvcost[2],
                                        const MV *center_mv);
 
 int vp9_refining_search_8p_c(const MACROBLOCK *x,
                              MV *ref_mv, int error_per_bit,
                              int search_range,
                              const vp9_variance_fn_ptr_t *fn_ptr,
-                             int *mvjcost, int *mvcost[2],
                              const MV *center_mv, const uint8_t *second_pred,
                              int w, int h);
 #ifdef __cplusplus
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0ac9d5f05..03f3c87a2 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -61,11 +61,6 @@ void vp9_coef_tree_initialize();
                                          // now so that HIGH_PRECISION is always
                                          // chosen.
 
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
-#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
-
 // #define OUTPUT_YUV_REC
 
 #ifdef OUTPUT_YUV_SRC
@@ -386,117 +381,119 @@ static int is_slowest_mode(int mode) {
 
 static void set_rd_speed_thresholds(VP9_COMP *cpi) {
   int i;
+  RD_OPT *const rd = &cpi->rd;
 
   // Set baseline threshold values
   for (i = 0; i < MAX_MODES; ++i)
-  cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
-
-  cpi->rd_thresh_mult[THR_NEARESTMV] = 0;
-  cpi->rd_thresh_mult[THR_NEARESTG] = 0;
-  cpi->rd_thresh_mult[THR_NEARESTA] = 0;
-
-  cpi->rd_thresh_mult[THR_DC] += 1000;
-
-  cpi->rd_thresh_mult[THR_NEWMV] += 1000;
-  cpi->rd_thresh_mult[THR_NEWA] += 1000;
-  cpi->rd_thresh_mult[THR_NEWG] += 1000;
-
-  cpi->rd_thresh_mult[THR_NEARMV] += 1000;
-  cpi->rd_thresh_mult[THR_NEARA] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
-  cpi->rd_thresh_mult[THR_TM] += 1000;
-
-  cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500;
-  cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000;
-  cpi->rd_thresh_mult[THR_NEARG] += 1000;
-  cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500;
-  cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000;
-
-  cpi->rd_thresh_mult[THR_ZEROMV] += 2000;
-  cpi->rd_thresh_mult[THR_ZEROG] += 2000;
-  cpi->rd_thresh_mult[THR_ZEROA] += 2000;
-  cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500;
-  cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500;
-
-  cpi->rd_thresh_mult[THR_H_PRED] += 2000;
-  cpi->rd_thresh_mult[THR_V_PRED] += 2000;
-  cpi->rd_thresh_mult[THR_D45_PRED ] += 2500;
-  cpi->rd_thresh_mult[THR_D135_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D117_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D153_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D207_PRED] += 2500;
-  cpi->rd_thresh_mult[THR_D63_PRED] += 2500;
+    rd->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+
+  rd->thresh_mult[THR_NEARESTMV] = 0;
+  rd->thresh_mult[THR_NEARESTG] = 0;
+  rd->thresh_mult[THR_NEARESTA] = 0;
+
+  rd->thresh_mult[THR_DC] += 1000;
+
+  rd->thresh_mult[THR_NEWMV] += 1000;
+  rd->thresh_mult[THR_NEWA] += 1000;
+  rd->thresh_mult[THR_NEWG] += 1000;
+
+  rd->thresh_mult[THR_NEARMV] += 1000;
+  rd->thresh_mult[THR_NEARA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+  rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+  rd->thresh_mult[THR_TM] += 1000;
+
+  rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+  rd->thresh_mult[THR_NEARG] += 1000;
+  rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+  rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+  rd->thresh_mult[THR_ZEROMV] += 2000;
+  rd->thresh_mult[THR_ZEROG] += 2000;
+  rd->thresh_mult[THR_ZEROA] += 2000;
+  rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+  rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+  rd->thresh_mult[THR_H_PRED] += 2000;
+  rd->thresh_mult[THR_V_PRED] += 2000;
+  rd->thresh_mult[THR_D45_PRED ] += 2500;
+  rd->thresh_mult[THR_D135_PRED] += 2500;
+  rd->thresh_mult[THR_D117_PRED] += 2500;
+  rd->thresh_mult[THR_D153_PRED] += 2500;
+  rd->thresh_mult[THR_D207_PRED] += 2500;
+  rd->thresh_mult[THR_D63_PRED] += 2500;
 
   /* disable frame modes if flags not set */
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEWMV    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROMV   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARMV   ] = INT_MAX;
+    rd->thresh_mult[THR_NEWMV    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+    rd->thresh_mult[THR_ZEROMV   ] = INT_MAX;
+    rd->thresh_mult[THR_NEARMV   ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROG    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARG    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEWG     ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARG    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWG     ] = INT_MAX;
   }
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_ZEROA    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEARA    ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_NEWA     ] = INT_MAX;
+    rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+    rd->thresh_mult[THR_ZEROA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEARA    ] = INT_MAX;
+    rd->thresh_mult[THR_NEWA     ] = INT_MAX;
   }
 
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_ZEROLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARLA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWLA    ] = INT_MAX;
   }
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
-    cpi->rd_thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
-    cpi->rd_thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_ZEROGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEARGA   ] = INT_MAX;
+    rd->thresh_mult[THR_COMP_NEWGA    ] = INT_MAX;
   }
 }
 
 static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
   const SPEED_FEATURES *const sf = &cpi->sf;
+  RD_OPT *const rd = &cpi->rd;
   int i;
 
   for (i = 0; i < MAX_REFS; ++i)
-    cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
+    rd->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode)  ? -500 : 0;
 
-  cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500;
-  cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500;
-  cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+  rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+  rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+  rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+  rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+  rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+  rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
 
   // Check for masked out split cases.
   for (i = 0; i < MAX_REFS; i++)
     if (sf->disable_split_mask & (1 << i))
-      cpi->rd_thresh_mult_sub8x8[i] = INT_MAX;
+      rd->thresh_mult_sub8x8[i] = INT_MAX;
 
   // disable mode test if frame flag is not set
   if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
   if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
       (VP9_LAST_FLAG | VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
   if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
       (VP9_GOLD_FLAG | VP9_ALT_FLAG))
-    cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+    rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
 }
 
 static void set_speed_features(VP9_COMP *cpi) {
@@ -627,50 +624,8 @@ static void update_frame_size(VP9_COMP *cpi) {
 }
 
 void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
-  VP9_COMMON *const cm = &cpi->common;
-  RATE_CONTROL *const rc = &cpi->rc;
-  VP9_CONFIG *const oxcf = &cpi->oxcf;
-  int vbr_max_bits;
-
-  oxcf->framerate = framerate < 0.1 ? 30 : framerate;
-  cpi->output_framerate = cpi->oxcf.framerate;
-  rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth /
-                                     cpi->output_framerate);
-  rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
-                                  oxcf->two_pass_vbrmin_section / 100);
-
-  rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
-
-  // A maximum bitrate for a frame is defined.
-  // The baseline for this aligns with HW implementations that
-  // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
-  // per 16x16 MB (averaged over a frame). However this limit is extended if
-  // a very high rate is given on the command line or the the rate cannnot
-  // be acheived because of a user specificed max q (e.g. when the user
-  // specifies lossless encode.
-  //
-  vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
-      oxcf->two_pass_vbrmax_section) / 100);
-  rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
-                                vbr_max_bits);
-
-  // Set Maximum gf/arf interval
-  rc->max_gf_interval = 16;
-
-  // Extended interval for genuinely static scenes
-  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
-
-  // Special conditions when alt ref frame enabled in lagged compress mode
-  if (oxcf->play_alternate && oxcf->lag_in_frames) {
-    if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
-      rc->max_gf_interval = oxcf->lag_in_frames - 1;
-
-    if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
-      rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
-  }
-
-  if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
-    rc->max_gf_interval = rc->static_scene_max_gf_interval;
+  cpi->oxcf.framerate = framerate < 0.1 ? 30 : framerate;
+  vp9_rc_update_framerate(cpi);
 }
 
 int64_t vp9_rescale(int64_t val, int64_t num, int denom) {
@@ -860,10 +815,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
   cm->display_width = cpi->oxcf.width;
   cm->display_height = cpi->oxcf.height;
 
-  // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
-  cpi->oxcf.sharpness = MIN(7, cpi->oxcf.sharpness);
-
-  cpi->common.lf.sharpness_level = cpi->oxcf.sharpness;
+  cm->lf.sharpness_level = cpi->oxcf.sharpness;
 
   if (cpi->initial_width) {
     // Increasing the size of the frame beyond the first seen frame, or some
@@ -883,10 +835,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
 
   cpi->speed = abs(cpi->oxcf.cpu_used);
 
-  // Limit on lag buffers as these are not currently dynamically allocated.
-  if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
-    cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
-
 #if CONFIG_MULTIPLE_ARF
   vp9_zero(cpi->alt_ref_source);
 #else
@@ -906,7 +854,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
   cpi->ext_refresh_frame_context_pending = 0;
 }
 
+#ifndef M_LOG2_E
 #define M_LOG2_E 0.693147180559945309417
+#endif
 #define log2f(x) (log (x) / (float) M_LOG2_E)
 
 static void cal_nmvjointsadcost(int *mvjointsadcost) {
@@ -1275,9 +1225,9 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
   // Default rd threshold factors for mode selection
   for (i = 0; i < BLOCK_SIZES; ++i) {
     for (j = 0; j < MAX_MODES; ++j)
-      cpi->rd_thresh_freq_fact[i][j] = 32;
+      cpi->rd.thresh_freq_fact[i][j] = 32;
     for (j = 0; j < MAX_REFS; ++j)
-      cpi->rd_thresh_freq_sub8x8[i][j] = 32;
+      cpi->rd.thresh_freq_sub8x8[i][j] = 32;
   }
 
 #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
@@ -1386,8 +1336,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   cm->error.setjmp = 0;
 
-  vp9_zero(cpi->common.counts.uv_mode);
-
 #ifdef MODE_TEST_HIT_STATS
   vp9_zero(cpi->mode_test_hits);
 #endif
@@ -1787,7 +1735,9 @@ static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
                        dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
     }
   }
-  vp8_yv12_extend_frame_borders(dst_fb);
+  // TODO(hkuang): Call C version explicitly
+  // as neon version only expand border size 32.
+  vp8_yv12_extend_frame_borders_c(dst_fb);
 }
 
 static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
@@ -1828,7 +1778,9 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
     }
   }
 
-  vp8_yv12_extend_frame_borders(dst_fb);
+  // TODO(hkuang): Call C version explicitly
+  // as neon version only expand border size 32.
+  vp8_yv12_extend_frame_borders_c(dst_fb);
 }
 
 static int find_fp_qindex() {
@@ -2538,7 +2490,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
   vp9_clear_system_state();
 
-  vp9_zero(cpi->rd_tx_select_threshes);
+  vp9_zero(cpi->rd.tx_select_threshes);
 
 #if CONFIG_VP9_POSTPROC
   if (cpi->oxcf.noise_sensitivity > 0) {
@@ -2647,22 +2599,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   output_frame_level_debug_stats(cpi);
 #endif
   if (cpi->refresh_golden_frame == 1)
-    cm->frame_flags |= FRAMEFLAGS_GOLDEN;
+    cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
   else
-    cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+    cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
 
   if (cpi->refresh_alt_ref_frame == 1)
-    cm->frame_flags |= FRAMEFLAGS_ALTREF;
+    cpi->frame_flags |= FRAMEFLAGS_ALTREF;
   else
-    cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
+    cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
 
   get_ref_frame_flags(cpi);
 
+  cm->last_frame_type = cm->frame_type;
   vp9_rc_postencode_update(cpi, *size);
 
   if (cm->frame_type == KEY_FRAME) {
     // Tell the caller that the frame was coded as a key frame
-    *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
+    *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
 
 #if CONFIG_MULTIPLE_ARF
     // Reset the sequence number.
@@ -2673,7 +2626,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
 #endif
   } else {
-    *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY;
+    *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
 
 #if CONFIG_MULTIPLE_ARF
     /* Increment position in the coded frame sequence. */
@@ -3042,7 +2995,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   }
 #endif
 
-  cm->frame_flags = *frame_flags;
+  cpi->frame_flags = *frame_flags;
+
+  if (cpi->pass == 2 &&
+      cm->current_video_frame == 0 &&
+      cpi->oxcf.allow_spatial_resampling &&
+      cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+    // Internal scaling is triggered on the first frame.
+    vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width,
+                         cpi->oxcf.scaled_frame_height);
+  }
 
   // Reset the frame pointers to the current frame size
   vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 18203f96a..7a110ac4c 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -185,6 +185,7 @@ typedef enum {
   AQ_MODE_COUNT  // This should always be the last member of the enum
 } AQ_MODE;
 
+
 typedef struct VP9_CONFIG {
   BITSTREAM_PROFILE profile;
   BIT_DEPTH bit_depth;
@@ -231,6 +232,11 @@ typedef struct VP9_CONFIG {
   int lossless;
   AQ_MODE aq_mode;  // Adaptive Quantization mode
 
+  // Internal frame size scaling.
+  int allow_spatial_resampling;
+  int scaled_frame_width;
+  int scaled_frame_height;
+
   // Enable feature to reduce the frame quantization every x frames.
   int frame_periodic_boost;
 
@@ -281,6 +287,35 @@ typedef struct VP9_CONFIG {
   vp8e_tuning tuning;
 } VP9_CONFIG;
 
+
+typedef struct RD_OPT {
+  // Thresh_mult is used to set a threshold for the rd score. A higher value
+  // means that we will accept the best mode so far more often. This number
+  // is used in combination with the current block size, and thresh_freq_fact
+  // to pick a threshold.
+  int thresh_mult[MAX_MODES];
+  int thresh_mult_sub8x8[MAX_REFS];
+
+  int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+  int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+  int thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
+  int thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
+
+  int64_t comp_pred_diff[REFERENCE_MODES];
+  int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
+  int64_t tx_select_diff[TX_MODES];
+  // FIXME(rbultje) can this overflow?
+  int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
+
+  int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+  int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+  int64_t mask_filter;
+
+  int RDMULT;
+  int RDDIV;
+} RD_OPT;
+
 typedef struct VP9_COMP {
   QUANTS quants;
   MACROBLOCK mb;
@@ -343,31 +378,7 @@ typedef struct VP9_COMP {
   // Ambient reconstruction err target for force key frames
   int ambient_err;
 
-  // Thresh_mult is used to set a threshold for the rd score. A higher value
-  // means that we will accept the best mode so far more often. This number
-  // is used in combination with the current block size, and thresh_freq_fact
-  // to pick a threshold.
-  int rd_thresh_mult[MAX_MODES];
-  int rd_thresh_mult_sub8x8[MAX_REFS];
-
-  int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
-  int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
-  int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
-  int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
-
-  int64_t rd_comp_pred_diff[REFERENCE_MODES];
-  int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
-  int64_t rd_tx_select_diff[TX_MODES];
-  // FIXME(rbultje) can this overflow?
-  int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
-
-  int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
-  int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
-  int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
-  int64_t mask_filter_rd;
-
-  int RDMULT;
-  int RDDIV;
+  RD_OPT rd;
 
   CODING_CONTEXT coding_context;
 
@@ -376,7 +387,6 @@ typedef struct VP9_COMP {
   int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
   int active_arnr_strength;         // <= cpi->oxcf.arnr_max_strength
 
-  double output_framerate;
   int64_t last_time_stamp_seen;
   int64_t last_end_time_stamp_seen;
   int64_t first_time_stamp_ever;
@@ -498,6 +508,8 @@ typedef struct VP9_COMP {
 
   int use_large_partition_rate;
 
+  int frame_flags;
+
 #if CONFIG_MULTIPLE_ARF
   // ARF tracking variables.
   int multi_arf_enabled;
@@ -591,7 +603,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
 
 // Intra only frames, golden frames (except alt ref overlays) and
 // alt ref frames tend to be coded at a higher than ambient quality
-static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) {
+static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
          (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
 }
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 3ac85228b..e003a0f42 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -23,7 +23,7 @@
 #include "vp9/encoder/vp9_picklpf.h"
 #include "vp9/encoder/vp9_quantize.h"
 
-static int get_max_filter_level(VP9_COMP *cpi) {
+static int get_max_filter_level(const VP9_COMP *cpi) {
   return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
                                                : MAX_LOOP_FILTER;
 }
@@ -43,15 +43,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
   return filt_err;
 }
 
-static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
-                                int partial_frame) {
-  VP9_COMMON *const cm = &cpi->common;
-  struct loopfilter *const lf = &cm->lf;
+static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+                               int partial_frame) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const struct loopfilter *const lf = &cm->lf;
   const int min_filter_level = 0;
   const int max_filter_level = get_max_filter_level(cpi);
-  int best_err;
-  int filt_best;
   int filt_direction = 0;
+  int best_err, filt_best;
+
   // Start the search at the previous frame filter level unless it is now out of
   // range.
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
@@ -128,7 +128,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
     }
   }
 
-  lf->filter_level = filt_best;
+  return filt_best;
 }
 
 void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
@@ -150,6 +150,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
       filt_guess -= 4;
     lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
   } else {
-    search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
+    lf->filter_level = search_filter_level(sd, cpi,
+                                           method == LPF_PICK_FROM_SUBIMAGE);
   }
 }
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index f3fe99cdb..3d398edc9 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -233,8 +233,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   const int64_t intra_mode_cost = 50;
 
   unsigned char segment_id = mbmi->segment_id;
-  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
-  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
   // Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
   int mode_idx[MB_MODE_COUNT] = {0};
   INTERP_FILTER filter_ref = SWITCHABLE;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 342081644..76ec84b5f 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -27,6 +27,11 @@
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_ratectrl.h"
 
+// Max rate target for 1080P and below encodes under normal circumstances
+// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+#define MAX_MB_RATE 250
+#define MAXRATE_1080P 2025000
+
 #define DEFAULT_KF_BOOST 2000
 #define DEFAULT_GF_BOOST 2000
 
@@ -74,14 +79,13 @@ void vp9_rc_init_minq_luts() {
 
   for (i = 0; i < QINDEX_RANGE; i++) {
     const double maxq = vp9_convert_qindex_to_q(i);
-
     kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
     kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
     gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
     gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
     afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
     afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
-    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
+    inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55);
   }
 }
 
@@ -367,8 +371,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
 
   // Calculate required scaling factor based on target frame size and size of
   // frame produced using previous Q.
-    target_bits_per_mb =
-        ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+  target_bits_per_mb =
+      ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
 
   i = active_best_quality;
 
@@ -565,11 +569,18 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cm->current_video_frame == 0))
-      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+  if (cm->frame_type == KEY_FRAME &&
+      !rc->this_key_frame_forced  &&
+      !(cm->current_video_frame == 0)) {
+    int qdelta = 0;
+    vp9_clear_system_state();
+    qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                        active_worst_quality, 2.0);
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
+
   // Special case code to try and match quality with forced key frames
   if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
     q = rc->last_boosted_qindex;
@@ -725,15 +736,26 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
   *bottom_index = active_best_quality;
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
-  // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    if (!(cm->current_video_frame == 0))
-      *top_index = (active_worst_quality + active_best_quality * 3) / 4;
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index = (active_worst_quality + active_best_quality) / 2;
+  {
+    int qdelta = 0;
+    vp9_clear_system_state();
+
+    // Limit Q range for the adaptive loop.
+    if (cm->frame_type == KEY_FRAME &&
+        !rc->this_key_frame_forced &&
+        !(cm->current_video_frame == 0)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 2.0);
+    } else if (!rc->is_src_frame_alt_ref &&
+               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 1.75);
+    }
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
+
   if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames
@@ -907,13 +929,22 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
   *bottom_index = active_best_quality;
 
 #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
-  // Limit Q range for the adaptive loop.
-  if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
-    *top_index = (active_worst_quality + active_best_quality * 3) / 4;
-  } else if (!rc->is_src_frame_alt_ref &&
-             (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
-    *top_index = (active_worst_quality + active_best_quality) / 2;
+  {
+    int qdelta = 0;
+    vp9_clear_system_state();
+
+    // Limit Q range for the adaptive loop.
+    if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 2.0);
+    } else if (!rc->is_src_frame_alt_ref &&
+               (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
+               (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+                                          active_worst_quality, 1.75);
+    }
+    *top_index = active_worst_quality + qdelta;
+    *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
   }
 #endif
 
@@ -1065,11 +1096,11 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
 }
 
 void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   const VP9_CONFIG *const oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
+  const int qindex = cm->base_qindex;
 
-  cm->last_frame_type = cm->frame_type;
   // Update rate control heuristics
   rc->projected_frame_size = (int)(bytes_used << 3);
 
@@ -1080,25 +1111,24 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
 
   // Keep a record of last Q and ambient average Q.
   if (cm->frame_type == KEY_FRAME) {
-    rc->last_q[KEY_FRAME] = cm->base_qindex;
-    rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
+    rc->last_q[KEY_FRAME] = qindex;
+    rc->avg_frame_qindex[KEY_FRAME] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
-      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
-      !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
-    rc->last_q[2] = cm->base_qindex;
-    rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+             !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
+    rc->last_q[2] = qindex;
+    rc->avg_frame_qindex[2] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
   } else {
-    rc->last_q[INTER_FRAME] = cm->base_qindex;
-    rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(
-        3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2);
+    rc->last_q[INTER_FRAME] = qindex;
+    rc->avg_frame_qindex[INTER_FRAME] =
+        ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
     rc->ni_frames++;
-    rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex);
-    rc->avg_q = rc->tot_q / (double)rc->ni_frames;
-
+    rc->tot_q += vp9_convert_qindex_to_q(qindex);
+    rc->avg_q = rc->tot_q / rc->ni_frames;
     // Calculate the average Q for normal inter frames (not key or GFU frames).
-    rc->ni_tot_qi += cm->base_qindex;
+    rc->ni_tot_qi += qindex;
     rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
   }
 
@@ -1107,11 +1137,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   // If all mbs in this group are skipped only update if the Q value is
   // better than that already stored.
   // This is used to help set quality in forced key frames to reduce popping
-  if ((cm->base_qindex < rc->last_boosted_qindex) ||
+  if ((qindex < rc->last_boosted_qindex) ||
       ((cpi->static_mb_pct < 100) &&
        ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame ||
         (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
-    rc->last_boosted_qindex = cm->base_qindex;
+    rc->last_boosted_qindex = qindex;
   }
 
   update_buffer_level(cpi, rc->projected_frame_size);
@@ -1196,7 +1226,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   int target;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
-       (cm->frame_flags & FRAMEFLAGS_KEY) ||
+       (cpi->frame_flags & FRAMEFLAGS_KEY) ||
        rc->frames_to_key == 0 ||
        (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1258,17 +1288,25 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
 
 static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const RATE_CONTROL *rc = &cpi->rc;
+  const VP9_CONFIG *oxcf = &cpi->oxcf;
+  const SVC *const svc = &cpi->svc;
   int target;
-
   if (cpi->common.current_video_frame == 0) {
     target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX)
       ? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2);
   } else {
-    const int initial_boost = 32;
-    int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
-    if (rc->frames_since_key < cpi->output_framerate / 2) {
+    int kf_boost = 32;
+    double framerate = oxcf->framerate;
+    if (svc->number_temporal_layers > 1 &&
+        oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+      // Use the layer framerate for temporal layers CBR mode.
+      const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+      framerate = lc->framerate;
+    }
+    kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
+    if (rc->frames_since_key <  framerate / 2) {
       kf_boost = (int)(kf_boost * rc->frames_since_key /
-                       (cpi->output_framerate / 2));
+                       (framerate / 2));
     }
     target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
   }
@@ -1280,7 +1318,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   int target = rc->av_per_frame_bandwidth;
   if ((cm->current_video_frame == 0) ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       (cpi->oxcf.auto_key && (rc->frames_since_key %
                               cpi->key_frame_frequency == 0))) {
     cm->frame_type = KEY_FRAME;
@@ -1304,7 +1342,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if ((cm->current_video_frame == 0 ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->frame_flags & FRAMEFLAGS_KEY) ||
       rc->frames_to_key == 0 ||
       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
@@ -1366,3 +1404,46 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
 
   return target_index - qindex;
 }
+
+void vp9_rc_update_framerate(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  RATE_CONTROL *const rc = &cpi->rc;
+  int vbr_max_bits;
+
+  rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / oxcf->framerate);
+  rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
+                                oxcf->two_pass_vbrmin_section / 100);
+
+  rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
+
+  // A maximum bitrate for a frame is defined.
+  // The baseline for this aligns with HW implementations that
+  // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
+  // per 16x16 MB (averaged over a frame). However this limit is extended if
+  // a very high rate is given on the command line or the the rate cannnot
+  // be acheived because of a user specificed max q (e.g. when the user
+  // specifies lossless encode.
+  vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
+                     oxcf->two_pass_vbrmax_section) / 100);
+  rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
+                                    vbr_max_bits);
+
+  // Set Maximum gf/arf interval
+  rc->max_gf_interval = 16;
+
+  // Extended interval for genuinely static scenes
+  rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+  // Special conditions when alt ref frame enabled in lagged compress mode
+  if (oxcf->play_alternate && oxcf->lag_in_frames) {
+    if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->max_gf_interval = oxcf->lag_in_frames - 1;
+
+    if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+      rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+  }
+
+  if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+    rc->max_gf_interval = rc->static_scene_max_gf_interval;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 7693c2b13..cf6526b8b 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -125,8 +125,7 @@ void vp9_rc_get_svc_params(struct VP9_COMP *cpi);
 
 // Post encode update of the rate control parameters based
 // on bytes used
-void vp9_rc_postencode_update(struct VP9_COMP *cpi,
-                              uint64_t bytes_used);
+void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used);
 // Post encode update of the rate control parameters for dropped frames
 void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
 
@@ -175,6 +174,8 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget);
 int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
                                int qindex, double rate_target_ratio);
 
+void vp9_rc_update_framerate(struct VP9_COMP *cpi);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1d70c1f9e..7ef21fa5d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -81,7 +81,7 @@ struct rdcost_block_args {
   const scan_order *so;
 };
 
-const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
+static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {NEARESTMV, {LAST_FRAME,   NONE}},
   {NEARESTMV, {ALTREF_FRAME, NONE}},
   {NEARESTMV, {GOLDEN_FRAME, NONE}},
@@ -121,7 +121,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
   {D45_PRED,  {INTRA_FRAME,  NONE}},
 };
 
-const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
+static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
   {{LAST_FRAME,   NONE}},
   {{GOLDEN_FRAME, NONE}},
   {{ALTREF_FRAME, NONE}},
@@ -134,8 +134,9 @@ const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 // certain modes are assumed to be based on 8x8 blocks.
 // This table is used to correct for blocks size.
 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static int rd_thresh_block_size_factor[BLOCK_SIZES] =
-  {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
+  2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
+};
 
 static int raster_block_offset(BLOCK_SIZE plane_bsize,
                                int raster_block, int stride) {
@@ -192,7 +193,7 @@ static void fill_token_costs(vp9_coeff_cost *c,
           }
 }
 
-static const int rd_iifactor[32] = {
+static const uint8_t rd_iifactor[32] = {
   4, 4, 3, 2, 1, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0,
@@ -244,6 +245,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
 
 static void set_block_thresholds(VP9_COMP *cpi) {
   const VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd = &cpi->rd;
   int i, bsize, segment_id;
 
   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
@@ -259,14 +261,14 @@ static void set_block_thresholds(VP9_COMP *cpi) {
       const int thresh_max = INT_MAX / t;
 
       for (i = 0; i < MAX_MODES; ++i)
-        cpi->rd_threshes[segment_id][bsize][i] =
-            cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
+        rd->threshes[segment_id][bsize][i] =
+            rd->thresh_mult[i] < thresh_max ? rd->thresh_mult[i] * t / 4
                                             : INT_MAX;
 
       for (i = 0; i < MAX_REFS; ++i) {
-        cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
-            cpi->rd_thresh_mult_sub8x8[i] < thresh_max
-                ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
+        rd->thresh_sub8x8[segment_id][bsize][i] =
+            rd->thresh_mult_sub8x8[i] < thresh_max
+                ? rd->thresh_mult_sub8x8[i] * t / 4
                 : INT_MAX;
       }
     }
@@ -280,10 +282,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 
   vp9_clear_system_state();
 
-  cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
-  cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+  cpi->rd.RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
+  cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
 
-  x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
+  x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO;
   x->errorperbit += (x->errorperbit == 0);
 
   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
@@ -1675,14 +1677,14 @@ static INLINE int mv_has_subpel(const MV *mv) {
 static int check_best_zero_mv(
     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
-    int disable_inter_mode_mask, int this_mode, int ref_frame,
-    int second_ref_frame) {
+    int disable_inter_mode_mask, int this_mode,
+    const MV_REFERENCE_FRAME ref_frames[2]) {
   if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
       (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
-      frame_mv[this_mode][ref_frame].as_int == 0 &&
-      (second_ref_frame == NONE ||
-       frame_mv[this_mode][second_ref_frame].as_int == 0)) {
-    int rfc = mode_context[ref_frame];
+      frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+      (ref_frames[1] == NONE ||
+       frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
+    int rfc = mode_context[ref_frames[0]];
     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
@@ -1693,15 +1695,15 @@ static int check_best_zero_mv(
       if (c2 > c3) return 0;
     } else {
       assert(this_mode == ZEROMV);
-      if (second_ref_frame == NONE) {
-        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
-            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+      if (ref_frames[1] == NONE) {
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
           return 0;
       } else {
-        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
-             frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
-            (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
-             frame_mv[NEARMV][second_ref_frame].as_int == 0))
+        if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
+             frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
+            (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
+             frame_mv[NEARMV][ref_frames[1]].as_int == 0))
           return 0;
       }
     }
@@ -1779,8 +1781,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
 
         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
                                 disable_inter_mode_mask,
-                                this_mode, mbmi->ref_frame[0],
-                                mbmi->ref_frame[1]))
+                                this_mode, mbmi->ref_frame))
           continue;
 
         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
@@ -1890,7 +1891,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                      x->mv_row_min, x->mv_row_max);
             thissme = cpi->full_search_sad(x, &mvp_full,
                                            sadpb, 16, v_fn_ptr,
-                                           x->nmvjointcost, x->mvcost,
                                            &bsi->ref_mv[0]->as_mv,
                                            &best_mv->as_mv);
             if (thissme < bestsme) {
@@ -2267,7 +2267,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                          int_mv *ref_mv,
                          int_mv *second_ref_mv,
                          int64_t comp_pred_diff[REFERENCE_MODES],
-                         int64_t tx_size_diff[TX_MODES],
+                         const int64_t tx_size_diff[TX_MODES],
                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
@@ -2579,7 +2579,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     struct buf_2d ref_yv12[2];
     int bestsme = INT_MAX;
     int sadpb = x->sadperbit16;
-    int_mv tmp_mv;
+    MV tmp_mv;
     int search_range = 3;
 
     int tmp_col_min = x->mv_col_min;
@@ -2608,20 +2608,19 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
 
     // Use mv result from single mode as mvp.
-    tmp_mv.as_int = frame_mv[refs[id]].as_int;
+    tmp_mv = frame_mv[refs[id]].as_mv;
 
-    tmp_mv.as_mv.col >>= 3;
-    tmp_mv.as_mv.row >>= 3;
+    tmp_mv.col >>= 3;
+    tmp_mv.row >>= 3;
 
     // Small-range full-pixel motion search
-    bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
+    bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
                                        search_range,
                                        &cpi->fn_ptr[bsize],
-                                       x->nmvjointcost, x->mvcost,
                                        &ref_mv[id].as_mv, second_pred,
                                        pw, ph);
     if (bestsme < INT_MAX)
-      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
                                       second_pred, &cpi->fn_ptr[bsize], 1);
 
     x->mv_col_min = tmp_col_min;
@@ -2633,7 +2632,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       int dis; /* TODO: use dis in distortion calculation later. */
       unsigned int sse;
       bestsme = cpi->find_fractional_mv_step_comp(
-          x, &tmp_mv.as_mv,
+          x, &tmp_mv,
           &ref_mv[id].as_mv,
           cpi->common.allow_high_precision_mv,
           x->errorperbit,
@@ -2648,7 +2647,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
       xd->plane[0].pre[0] = scaled_first_yv12;
 
     if (bestsme < last_besterr[id]) {
-      frame_mv[refs[id]].as_int = tmp_mv.as_int;
+      frame_mv[refs[id]].as_mv = tmp_mv;
       last_besterr[id] = bestsme;
     } else {
       break;
@@ -2698,6 +2697,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                  int64_t *psse,
                                  const int64_t ref_best_rd) {
   VP9_COMMON *cm = &cpi->common;
+  RD_OPT *rd_opt = &cpi->rd;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
   const int is_comp_pred = has_second_ref(mbmi);
@@ -2795,14 +2795,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
   // Search for best switchable filter by checking the variance of
   // pred error irrespective of whether the filter will be used
-  cpi->mask_filter_rd = 0;
+  rd_opt->mask_filter = 0;
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-    cpi->rd_filter_cache[i] = INT64_MAX;
+    rd_opt->filter_cache[i] = INT64_MAX;
 
   if (cm->interp_filter != BILINEAR) {
     *best_filter = EIGHTTAP;
-    if (x->source_variance <
-        cpi->sf.disable_filter_search_var_thresh) {
+    if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
       *best_filter = EIGHTTAP;
     } else {
       int newbest;
@@ -2818,12 +2817,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
         if (i > 0 && intpel_mv) {
           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
-          cpi->rd_filter_cache[i] = rd;
-          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          rd_opt->filter_cache[i] = rd;
+          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
-          cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+          rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
         } else {
           int rate_sum = 0;
           int64_t dist_sum = 0;
@@ -2843,12 +2842,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
 
           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
-          cpi->rd_filter_cache[i] = rd;
-          cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-              MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+          rd_opt->filter_cache[i] = rd;
+          rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+              MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
           if (cm->interp_filter == SWITCHABLE)
             rd += rs_rd;
-          cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+          rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
 
           if (i == 0 && intpel_mv) {
             tmp_rate_sum = rate_sum;
@@ -3125,6 +3124,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
                                   PICK_MODE_CONTEXT *ctx,
                                   int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -3145,7 +3145,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
   MB_MODE_INFO best_mbmode = { 0 };
-  int mode_index, best_mode_index = 0;
+  int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
   int64_t best_intra_rd = INT64_MAX;
@@ -3164,8 +3164,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int best_skip2 = 0;
   int mode_skip_mask = 0;
   int mode_skip_start = cpi->sf.mode_skip_start + 1;
-  const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
-  const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+  const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
+  const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
@@ -3302,7 +3302,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Look at the reference frame of the best mode so far and set the
     // skip mask to look at a subset of the remaining modes.
-    if (mode_index == mode_skip_start && best_rd < INT64_MAX) {
+    if (mode_index == mode_skip_start && best_mode_index >= 0) {
       switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
         case INTRA_FRAME:
           break;
@@ -3339,6 +3339,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     comp_pred = second_ref_frame > INTRA_FRAME;
     if (comp_pred) {
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+          best_mode_index >=0 &&
           vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
         continue;
       if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
@@ -3366,7 +3367,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         // one of the neighboring directional modes
         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
-          if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
+          if (best_mode_index >= 0 &&
+              vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
             continue;
         }
         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -3376,11 +3378,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       }
     } else {
       if (x->in_active_map &&
-          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+          !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+        const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
-                                disable_inter_mode_mask, this_mode, ref_frame,
-                                second_ref_frame))
+                                disable_inter_mode_mask, this_mode, ref_frames))
           continue;
+      }
     }
 
     mbmi->mode = this_mode;
@@ -3608,21 +3611,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
       /* keep record of best filter type */
       if (!mode_excluded && cm->interp_filter != BILINEAR) {
-        int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+        int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
 
         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
           int64_t adj_rd;
           if (ref == INT64_MAX)
             adj_rd = 0;
-          else if (cpi->rd_filter_cache[i] == INT64_MAX)
+          else if (rd_opt->filter_cache[i] == INT64_MAX)
             // when early termination is triggered, the encoder does not have
             // access to the rate-distortion cost. it only knows that the cost
             // should be above the maximum valid value. hence it takes the known
             // maximum plus an arbitrary constant as the rate-distortion cost.
-            adj_rd = cpi->mask_filter_rd - ref + 10;
+            adj_rd = rd_opt->mask_filter - ref + 10;
           else
-            adj_rd = cpi->rd_filter_cache[i] - ref;
+            adj_rd = rd_opt->filter_cache[i] - ref;
 
           adj_rd += this_rd;
           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
@@ -3654,7 +3657,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       break;
   }
 
-  if (best_rd >= best_rd_so_far)
+  if (best_mode_index < 0 || best_rd >= best_rd_so_far)
     return INT64_MAX;
 
   // If we used an estimate for the uv intra rd in the loop above...
@@ -3684,7 +3687,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
-      int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
+      int *const fact = &rd_opt->thresh_freq_fact[bsize][mode_index];
 
       if (mode_index == best_mode_index) {
         *fact -= (*fact >> 3);
@@ -3756,6 +3759,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
                                       PICK_MODE_CONTEXT *ctx,
                                       int64_t best_rd_so_far) {
   VP9_COMMON *const cm = &cpi->common;
+  RD_OPT *const rd_opt = &cpi->rd;
   MACROBLOCKD *const xd = &x->e_mbd;
   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
   const struct segmentation *const seg = &cm->seg;
@@ -3768,8 +3772,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
                                     VP9_ALT_FLAG };
   int64_t best_rd = best_rd_so_far;
   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
-  int64_t best_tx_rd[TX_MODES];
-  int64_t best_tx_diff[TX_MODES];
+  static const int64_t best_tx_diff[TX_MODES] = { 0 };
   int64_t best_pred_diff[REFERENCE_MODES];
   int64_t best_pred_rd[REFERENCE_MODES];
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
@@ -3781,10 +3784,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_inter_rd = INT64_MAX;
   MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
   INTERP_FILTER tmp_best_filter = SWITCHABLE;
-  int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
-  int64_t dist_uv[TX_SIZES];
-  int skip_uv[TX_SIZES];
-  MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
+  int rate_uv_intra, rate_uv_tokenonly;
+  int64_t dist_uv;
+  int skip_uv;
+  MB_PREDICTION_MODE mode_uv = DC_PRED;
   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
   int_mv seg_mvs[4][MAX_REF_FRAMES];
   b_mode_info best_bmodes[4];
@@ -3806,12 +3809,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
   for (i = 0; i < REFERENCE_MODES; ++i)
     best_pred_rd[i] = INT64_MAX;
-  for (i = 0; i < TX_MODES; i++)
-    best_tx_rd[i] = INT64_MAX;
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
     best_filter_rd[i] = INT64_MAX;
-  for (i = 0; i < TX_SIZES; i++)
-    rate_uv_intra[i] = INT_MAX;
+  rate_uv_intra = INT_MAX;
 
   *returnrate = INT_MAX;
 
@@ -3845,7 +3845,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     int rate2 = 0, rate_y = 0, rate_uv = 0;
     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
     int skippable = 0;
-    int64_t tx_cache[TX_MODES];
     int i;
     int this_skip2 = 0;
     int64_t total_sse = INT_MAX;
@@ -3882,9 +3881,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     // Test best rd so far against threshold for trying this mode.
     if ((best_rd <
-         ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
-          cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
-        cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
+         ((int64_t)rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] *
+          rd_opt->thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
+        rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
       continue;
 
     if (ref_frame > INTRA_FRAME &&
@@ -3911,10 +3910,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     // TODO(jingning, jkoleszar): scaling reference frame not supported for
     // sub8x8 blocks.
-    if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+    if (ref_frame > INTRA_FRAME &&
+        vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
       continue;
 
-    if (second_ref_frame > NONE &&
+    if (second_ref_frame > INTRA_FRAME &&
         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
       continue;
 
@@ -3968,9 +3968,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
     }
 
-    for (i = 0; i < TX_MODES; ++i)
-      tx_cache[i] = INT64_MAX;
-
 #ifdef MODE_TEST_HIT_STATS
     // TEST/DEBUG CODE
     // Keep a rcord of the number of test hits at each size
@@ -3986,21 +3983,18 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       rate2 += intra_cost_penalty;
       distortion2 += distortion_y;
 
-      if (rate_uv_intra[TX_4X4] == INT_MAX) {
+      if (rate_uv_intra == INT_MAX) {
         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
-                             &rate_uv_intra[TX_4X4],
-                             &rate_uv_tokenonly[TX_4X4],
-                             &dist_uv[TX_4X4], &skip_uv[TX_4X4],
-                             &mode_uv[TX_4X4]);
+                             &rate_uv_intra,
+                             &rate_uv_tokenonly,
+                             &dist_uv, &skip_uv,
+                             &mode_uv);
       }
-      rate2 += rate_uv_intra[TX_4X4];
-      rate_uv = rate_uv_tokenonly[TX_4X4];
-      distortion2 += dist_uv[TX_4X4];
-      distortion_uv = dist_uv[TX_4X4];
-      mbmi->uv_mode = mode_uv[TX_4X4];
-      tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-      for (i = 0; i < TX_MODES; ++i)
-        tx_cache[i] = tx_cache[ONLY_4X4];
+      rate2 += rate_uv_intra;
+      rate_uv = rate_uv_tokenonly;
+      distortion2 += dist_uv;
+      distortion_uv = dist_uv;
+      mbmi->uv_mode = mode_uv;
     } else {
       int rate;
       int64_t distortion;
@@ -4019,14 +4013,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
       int uv_skippable;
 
       this_rd_thresh = (ref_frame == LAST_FRAME) ?
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
+          rd_opt->thresh_sub8x8[segment_id][bsize][THR_LAST] :
+          rd_opt->thresh_sub8x8[segment_id][bsize][THR_ALTR];
       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
-          cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
-
-      cpi->mask_filter_rd = 0;
+      rd_opt->thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
+      rd_opt->mask_filter = 0;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
-        cpi->rd_filter_cache[i] = INT64_MAX;
+        rd_opt->filter_cache[i] = INT64_MAX;
 
       if (cm->interp_filter != BILINEAR) {
         tmp_best_filter = EIGHTTAP;
@@ -4059,14 +4052,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
               continue;
             rs = vp9_get_switchable_rate(x);
             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-            cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
-            cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
-                MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
+            rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
+            rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+                MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
                     tmp_rd + rs_rd);
             if (cm->interp_filter == SWITCHABLE)
               tmp_rd += rs_rd;
 
-            cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
+            rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
 
             newbest = (tmp_rd < tmp_best_rd);
             if (newbest) {
@@ -4163,10 +4156,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
         distortion2 += distortion_uv;
         skippable = skippable && uv_skippable;
         total_sse += uv_sse;
-
-        tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
-        for (i = 0; i < TX_MODES; ++i)
-          tx_cache[i] = tx_cache[ONLY_4X4];
       }
     }
 
@@ -4304,43 +4293,26 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     /* keep record of best filter type */
     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
         cm->interp_filter != BILINEAR) {
-      int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+      int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
                               SWITCHABLE_FILTERS : cm->interp_filter];
       int64_t adj_rd;
       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
         if (ref == INT64_MAX)
           adj_rd = 0;
-        else if (cpi->rd_filter_cache[i] == INT64_MAX)
+        else if (rd_opt->filter_cache[i] == INT64_MAX)
           // when early termination is triggered, the encoder does not have
           // access to the rate-distortion cost. it only knows that the cost
           // should be above the maximum valid value. hence it takes the known
           // maximum plus an arbitrary constant as the rate-distortion cost.
-          adj_rd = cpi->mask_filter_rd - ref + 10;
+          adj_rd = rd_opt->mask_filter - ref + 10;
         else
-          adj_rd = cpi->rd_filter_cache[i] - ref;
+          adj_rd = rd_opt->filter_cache[i] - ref;
 
         adj_rd += this_rd;
         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
       }
     }
 
-    /* keep record of best txfm size */
-    tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
-    tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
-    tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
-    if (!mode_excluded && this_rd != INT64_MAX) {
-      for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
-        int64_t adj_rd = INT64_MAX;
-        if (ref_frame > INTRA_FRAME)
-          adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
-        else
-          adj_rd = this_rd;
-
-        if (adj_rd < best_tx_rd[i])
-          best_tx_rd[i] = adj_rd;
-      }
-    }
-
     if (early_term)
       break;
 
@@ -4355,14 +4327,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   if (cpi->sf.use_uv_intra_rd_estimate) {
     // Do Intra UV best rd mode selection if best mode choice above was intra.
     if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
-      TX_SIZE uv_tx_size;
       *mbmi = best_mbmode;
-      uv_tx_size = get_uv_tx_size(mbmi);
-      rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
-                              &rate_uv_tokenonly[uv_tx_size],
-                              &dist_uv[uv_tx_size],
-                              &skip_uv[uv_tx_size],
-                              BLOCK_8X8, uv_tx_size);
+      rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
+                              &rate_uv_tokenonly,
+                              &dist_uv,
+                              &skip_uv,
+                              BLOCK_8X8, TX_4X4);
     }
   }
 
@@ -4383,7 +4353,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
   // combination that wins out.
   if (cpi->sf.adaptive_rd_thresh) {
     for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
-      int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
+      int *const fact = &rd_opt->thresh_freq_sub8x8[bsize][mode_index];
 
       if (mode_index == best_mode_index) {
         *fact -= (*fact >> 3);
@@ -4424,15 +4394,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
     }
     if (cm->interp_filter == SWITCHABLE)
       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
-    for (i = 0; i < TX_MODES; i++) {
-      if (best_tx_rd[i] == INT64_MAX)
-        best_tx_diff[i] = 0;
-      else
-        best_tx_diff[i] = best_rd - best_tx_rd[i];
-    }
   } else {
     vp9_zero(best_filter_diff);
-    vp9_zero(best_tx_diff);
   }
 
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index d6b6174fa..f0bd8a147 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -13,32 +13,39 @@
 #include "vp9/encoder/vp9_onyx_int.h"
 #include "vp9/encoder/vp9_speed_features.h"
 
-#define ALL_INTRA_MODES ((1 << DC_PRED) | \
-                         (1 << V_PRED) | (1 << H_PRED) | \
-                         (1 << D45_PRED) | (1 << D135_PRED) | \
-                         (1 << D117_PRED) | (1 << D153_PRED) | \
-                         (1 << D207_PRED) | (1 << D63_PRED) | \
-                         (1 << TM_PRED))
-#define INTRA_DC_ONLY   (1 << DC_PRED)
-#define INTRA_DC_TM     ((1 << TM_PRED) | (1 << DC_PRED))
-#define INTRA_DC_H_V    ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
-#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
-
-// Masks for partially or completely disabling split mode
-#define DISABLE_ALL_INTER_SPLIT   ((1 << THR_COMP_GA) | \
-                                   (1 << THR_COMP_LA) | \
-                                   (1 << THR_ALTR) | \
-                                   (1 << THR_GOLD) | \
-                                   (1 << THR_LAST))
-
-#define DISABLE_ALL_SPLIT         ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT)
-
-#define DISABLE_COMPOUND_SPLIT    ((1 << THR_COMP_GA) | (1 << THR_COMP_LA))
-
-#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \
-                                   (1 << THR_COMP_LA) | \
-                                   (1 << THR_ALTR) | \
-                                   (1 << THR_GOLD))
+enum {
+  ALL_INTRA_MODES = (1 << DC_PRED) |
+                    (1 << V_PRED) | (1 << H_PRED) |
+                    (1 << D45_PRED) | (1 << D135_PRED) |
+                    (1 << D117_PRED) | (1 << D153_PRED) |
+                    (1 << D207_PRED) | (1 << D63_PRED) |
+                    (1 << TM_PRED),
+
+  INTRA_DC_ONLY   = (1 << DC_PRED),
+
+  INTRA_DC_TM     = (1 << TM_PRED) | (1 << DC_PRED),
+
+  INTRA_DC_H_V    = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
+
+  INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)
+};
+
+enum {
+  DISABLE_ALL_INTER_SPLIT   = (1 << THR_COMP_GA) |
+                              (1 << THR_COMP_LA) |
+                              (1 << THR_ALTR) |
+                              (1 << THR_GOLD) |
+                              (1 << THR_LAST),
+
+  DISABLE_ALL_SPLIT         = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
+
+  DISABLE_COMPOUND_SPLIT    = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
+
+  LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
+                              (1 << THR_COMP_LA) |
+                              (1 << THR_ALTR) |
+                              (1 << THR_GOLD)
+};
 
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
@@ -49,8 +56,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   if (speed >= 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
-    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
-                                                          : USE_LARGESTALL;
+    sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+                                                      : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -73,8 +80,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   }
 
   if (speed >= 2) {
-    sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
-                                                          : USE_LARGESTALL;
+    sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+                                                        : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -269,9 +276,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
     sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
     sf->source_var_thresh = 360;
-
-    sf->use_nonrd_pick_mode = 1;
-    sf->search_method = FAST_DIAMOND;
   }
 
   if (speed >= 7) {
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 72f548a04..55422979a 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -344,7 +344,7 @@ typedef struct {
   int search_type_check_frequency;
 
   // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
-  int source_var_thresh;
+  unsigned int source_var_thresh;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index c2b6263f0..9b3fc6eab 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -178,7 +178,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
   cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
   cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
   cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
-  cpi->output_framerate = lc->framerate;
   // Reset the frames_since_key and frames_to_key counters to their values
   // before the layer restore. Keep these defined for the stream (not layer).
   if (cpi->svc.number_temporal_layers > 1) {
@@ -197,7 +196,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
   lc->starting_buffer_level = oxcf->starting_buffer_level;
   lc->optimal_buffer_level = oxcf->optimal_buffer_level;
   lc->maximum_buffer_size = oxcf->maximum_buffer_size;
-  lc->framerate = cpi->output_framerate;
 }
 
 void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 62e20dc00..c9e39a1a2 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -45,12 +45,6 @@ typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
                                    int  ref_stride,
                                    unsigned int *sad_array);
 
-typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
-                                    int source_stride,
-                                    const uint8_t *ref_ptr,
-                                    int  ref_stride,
-                                    unsigned int *sad_array);
-
 typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
                                      int source_stride,
                                      const uint8_t* const ref_ptr[],
@@ -96,7 +90,7 @@ typedef struct vp9_variance_vtable {
   vp9_variance_fn_t          svf_halfpix_v;
   vp9_variance_fn_t          svf_halfpix_hv;
   vp9_sad_multi_fn_t         sdx3f;
-  vp9_sad_multi1_fn_t        sdx8f;
+  vp9_sad_multi_fn_t         sdx8f;
   vp9_sad_multi_d_fn_t       sdx4df;
 } vp9_variance_fn_ptr_t;
 
diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h
deleted file mode 100644
index c15039ad8..000000000
--- a/vp9/encoder/x86/vp9_mcomp_x86.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-#define VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_SSE3
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx3
-
-#undef  vp9_search_refining_search
-#define vp9_search_refining_search vp9_refining_search_sadx4
-
-#undef  vp9_search_diamond_search
-#define vp9_search_diamond_search vp9_diamond_search_sadx4
-
-#endif
-#endif
-
-#if HAVE_SSE4_1
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef  vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx8
-
-#endif
-#endif
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0623ad132..967431c0f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -168,6 +168,11 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
   RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
   RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
 
+  if (cfg->rc_resize_allowed == 1) {
+    RANGE_CHECK(cfg, rc_scaled_width, 1, cfg->g_w);
+    RANGE_CHECK(cfg, rc_scaled_height, 1, cfg->g_h);
+  }
+
   RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
   if (cfg->ts_number_layers > 1) {
@@ -332,6 +337,10 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->under_shoot_pct         = cfg->rc_undershoot_pct;
   oxcf->over_shoot_pct          = cfg->rc_overshoot_pct;
 
+  oxcf->allow_spatial_resampling = cfg->rc_resize_allowed;
+  oxcf->scaled_frame_width       = cfg->rc_scaled_width;
+  oxcf->scaled_frame_height      = cfg->rc_scaled_height;
+
   oxcf->maximum_buffer_size     = cfg->rc_buf_sz;
   oxcf->starting_buffer_level   = cfg->rc_buf_initial_sz;
   oxcf->optimal_buffer_level    = cfg->rc_buf_optimal_sz;
@@ -410,6 +419,9 @@ static vpx_codec_err_t set_encoder_config(
   printf("fixed_q: %d\n",  oxcf->fixed_q);
   printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
   printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+  printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
+  printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
+  printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
   printf("two_pass_vbrbias: %d\n",  oxcf->two_pass_vbrbias);
   printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
   printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
@@ -1128,6 +1140,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
 
       0,                  // rc_dropframe_thresh
       0,                  // rc_resize_allowed
+      1,                  // rc_scaled_width
+      1,                  // rc_scaled_height
       60,                 // rc_resize_down_thresold
       30,                 // rc_resize_up_thresold
 
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index da6c0f8b6..24b8d9de1 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -87,8 +87,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
 VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
 
-
-VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
 VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm