35 files changed, 580 insertions, 479 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c
index e033fbb99..6f771992b 100644
--- a/vp9/common/vp9_alloccommon.c
+++ b/vp9/common/vp9_alloccommon.c
@@ -33,9 +33,16 @@ void vp9_update_mode_info_border(VP9_COMMON *cm, MODE_INFO *mi) {
 void vp9_free_frame_buffers(VP9_COMMON *cm) {
   int i;
 
-  for (i = 0; i < FRAME_BUFFERS; i++)
+  for (i = 0; i < FRAME_BUFFERS; i++) {
     vp9_free_frame_buffer(&cm->frame_bufs[i].buf);
 
+    if (cm->frame_bufs[i].ref_count > 0 &&
+        cm->frame_bufs[i].raw_frame_buffer.data != NULL) {
+      cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[i].raw_frame_buffer);
+      cm->frame_bufs[i].ref_count = 0;
+    }
+  }
+
   vp9_free_frame_buffer(&cm->post_proc_buffer);
 
   vpx_free(cm->mip);
@@ -85,7 +92,7 @@ int vp9_resize_frame_buffers(VP9_COMMON *cm, int width, int height) {
   int mi_size;
 
   if (vp9_realloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y,
-                               VP9_DEC_BORDER_IN_PIXELS) < 0)
+                               VP9_DEC_BORDER_IN_PIXELS, NULL, NULL, NULL) < 0)
     goto fail;
 
   set_mb_mi(cm, aligned_width, aligned_height);
@@ -199,6 +206,7 @@ void vp9_create_common(VP9_COMMON *cm) {
 
 void vp9_remove_common(VP9_COMMON *cm) {
   vp9_free_frame_buffers(cm);
+  vp9_free_internal_frame_buffers(&cm->int_frame_buffers);
 }
 
 void vp9_initialize_common() {
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index 70b8ffa4e..f10a3c8c7 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -182,7 +182,7 @@ struct macroblockd_plane {
   int subsampling_y;
   struct buf_2d dst;
   struct buf_2d pre[2];
-  int16_t *dequant;
+  const int16_t *dequant;
   ENTROPY_CONTEXT *above_context;
   ENTROPY_CONTEXT *left_context;
 };
diff --git a/vp9/common/vp9_convolve.c b/vp9/common/vp9_convolve.c
index 3807ccc87..d30e0b488 100644
--- a/vp9/common/vp9_convolve.c
+++ b/vp9/common/vp9_convolve.c
@@ -145,7 +145,7 @@ static const InterpKernel *get_filter_base(const int16_t *filter) {
 }
 
 static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
-  return (const InterpKernel *)(intptr_t)f - base;
+  return (int)((const InterpKernel *)(intptr_t)f - base);
 }
 
 void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h
index e030d92ec..d6b380fd5 100644
--- a/vp9/common/vp9_entropy.h
+++ b/vp9/common/vp9_entropy.h
@@ -119,7 +119,7 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
 extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_8x8plus[1024]);
 extern DECLARE_ALIGNED(16, const uint8_t, vp9_coefband_trans_4x4[16]);
 
-static const uint8_t *get_band_translate(TX_SIZE tx_size) {
+static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
   return tx_size == TX_4X4 ? vp9_coefband_trans_4x4
                            : vp9_coefband_trans_8x8plus;
 }
@@ -146,8 +146,8 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
 
 void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
 
-static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
-                                                const ENTROPY_CONTEXT *l) {
+static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
+                                      const ENTROPY_CONTEXT *l) {
   ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
 
   switch (tx_size) {
@@ -174,8 +174,8 @@ static int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
   return combine_entropy_contexts(above_ec, left_ec);
 }
 
-static const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
-                                  PLANE_TYPE type, int block_idx) {
+static const INLINE scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+                                         PLANE_TYPE type, int block_idx) {
   const MODE_INFO *const mi = xd->mi_8x8[0];
   const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 6def3c869..25cba7fbe 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -345,7 +345,7 @@ static int adapt_prob(vp9_prob pre_prob, const unsigned int ct[2]) {
 static void adapt_probs(const vp9_tree_index *tree,
                         const vp9_prob *pre_probs, const unsigned int *counts,
                         vp9_prob *probs) {
-  tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
+  vp9_tree_merge_probs(tree, pre_probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR,
                    probs);
 }
 
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 60ae79fdc..e1f5ef7b4 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -192,8 +192,8 @@ static vp9_prob adapt_prob(vp9_prob prep, const unsigned int ct[2]) {
 
 static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
                         const unsigned int *counts, vp9_prob *probs) {
-  tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT, MV_MAX_UPDATE_FACTOR,
-                   probs);
+  vp9_tree_merge_probs(tree, pre_probs, counts, MV_COUNT_SAT,
+                       MV_MAX_UPDATE_FACTOR, probs);
 }
 
 void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
diff --git a/vp9/common/vp9_frame_buffers.c b/vp9/common/vp9_frame_buffers.c
new file mode 100644
index 000000000..d903ed695
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.c
@@ -0,0 +1,84 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vp9/common/vp9_frame_buffers.h"
+#include "vpx_mem/vpx_mem.h"
+
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) {
+  assert(list != NULL);
+  vp9_free_internal_frame_buffers(list);
+
+  list->num_internal_frame_buffers =
+      VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
+  list->int_fb = vpx_calloc(list->num_internal_frame_buffers,
+                            sizeof(*list->int_fb));
+  return (list->int_fb == NULL);
+}
+
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) {
+  int i;
+
+  assert(list != NULL);
+
+  for (i = 0; i < list->num_internal_frame_buffers; ++i) {
+    vpx_free(list->int_fb[i].data);
+    list->int_fb[i].data = NULL;
+  }
+  vpx_free(list->int_fb);
+  list->int_fb = NULL;
+}
+
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb) {
+  int i;
+  InternalFrameBufferList *const int_fb_list =
+      (InternalFrameBufferList *)cb_priv;
+  if (int_fb_list == NULL || fb == NULL)
+    return -1;
+
+  // Find a free frame buffer.
+  for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) {
+    if (!int_fb_list->int_fb[i].in_use)
+      break;
+  }
+
+  if (i == int_fb_list->num_internal_frame_buffers)
+    return -1;
+
+  if (int_fb_list->int_fb[i].size < min_size) {
+    int_fb_list->int_fb[i].data =
+        (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
+    if (!int_fb_list->int_fb[i].data)
+      return -1;
+
+    int_fb_list->int_fb[i].size = min_size;
+  }
+
+  fb->data = int_fb_list->int_fb[i].data;
+  fb->size = int_fb_list->int_fb[i].size;
+  int_fb_list->int_fb[i].in_use = 1;
+
+  // Set the frame buffer's private data to point at the internal frame buffer.
+  fb->priv = &int_fb_list->int_fb[i];
+  return 0;
+}
+
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) {
+  InternalFrameBuffer *int_fb;
+  (void)cb_priv;
+  if (fb == NULL)
+    return -1;
+
+  int_fb = (InternalFrameBuffer *)fb->priv;
+  int_fb->in_use = 0;
+  return 0;
+}
diff --git a/vp9/common/vp9_frame_buffers.h b/vp9/common/vp9_frame_buffers.h
new file mode 100644
index 000000000..e2cfe61b6
--- /dev/null
+++ b/vp9/common/vp9_frame_buffers.h
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#define VP9_COMMON_VP9_FRAME_BUFFERS_H_
+
+#include "vpx/vpx_frame_buffer.h"
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct InternalFrameBuffer {
+  uint8_t *data;
+  size_t size;
+  int in_use;
+} InternalFrameBuffer;
+
+typedef struct InternalFrameBufferList {
+  int num_internal_frame_buffers;
+  InternalFrameBuffer *int_fb;
+} InternalFrameBufferList;
+
+// Initializes |list|. Returns 0 on success.
+int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Free any data allocated to the frame buffers.
+void vp9_free_internal_frame_buffers(InternalFrameBufferList *list);
+
+// Callback used by libvpx to request an external frame buffer. |cb_priv|
+// Callback private data, which points to an InternalFrameBufferList.
+// |min_size| is the minimum size in bytes needed to decode the next frame.
+// |fb| pointer to the frame buffer.
+int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
+                         vpx_codec_frame_buffer_t *fb);
+
+// Callback used by libvpx when there are no references to the frame buffer.
+// |cb_priv| is not used. |fb| pointer to the frame buffer.
+int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // VP9_COMMON_VP9_FRAME_BUFFERS_H_
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 98fd1d82f..3eb7f9d61 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -34,8 +34,8 @@ typedef struct mv32 {
   int32_t col;
 } MV32;
 
-static void clamp_mv(MV *mv, int min_col, int max_col,
-                             int min_row, int max_row) {
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
+                            int min_row, int max_row) {
   mv->col = clamp(mv->col, min_col, max_col);
   mv->row = clamp(mv->row, min_row, max_row);
 }
diff --git a/vp9/common/vp9_mvref_common.h b/vp9/common/vp9_mvref_common.h
index 0936abfcd..f99952f3c 100644
--- a/vp9/common/vp9_mvref_common.h
+++ b/vp9/common/vp9_mvref_common.h
@@ -48,7 +48,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
                            int_mv *mvlist, int_mv *nearest, int_mv *near);
 
 // TODO(jingning): this mv clamping function should be block size dependent.
-static void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
+static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
   clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
                xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
                xd->mb_to_top_edge - LEFT_TOP_MARGIN,
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index d92a25b12..97983c596 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -18,6 +18,7 @@
 #include "vp9/common/vp9_entropymv.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vp9/common/vp9_entropymode.h"
+#include "vp9/common/vp9_frame_buffers.h"
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_tile_common.h"
 
@@ -94,6 +95,7 @@ typedef enum {
 
 typedef struct {
   int ref_count;
+  vpx_codec_frame_buffer_t raw_frame_buffer;
   YV12_BUFFER_CONFIG buf;
 } RefCntBuffer;
 
@@ -223,13 +225,21 @@ typedef struct VP9Common {
   int frame_parallel_decoding_mode;
 
   int log2_tile_cols, log2_tile_rows;
+
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+  vpx_get_frame_buffer_cb_fn_t get_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  // Handles memory for the codec.
+  InternalFrameBufferList int_frame_buffers;
 } VP9_COMMON;
 
-static YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
+static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) {
   return &cm->frame_bufs[cm->new_fb_idx].buf;
 }
 
-static int get_free_fb(VP9_COMMON *cm) {
+static INLINE int get_free_fb(VP9_COMMON *cm) {
   int i;
   for (i = 0; i < FRAME_BUFFERS; i++)
     if (cm->frame_bufs[i].ref_count == 0)
@@ -240,7 +250,7 @@ static int get_free_fb(VP9_COMMON *cm) {
   return i;
 }
 
-static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
+static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
   const int ref_index = *idx;
 
   if (ref_index >= 0 && bufs[ref_index].ref_count > 0)
@@ -251,7 +261,7 @@ static void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) {
   bufs[new_idx].ref_count++;
 }
 
-static int mi_cols_aligned_to_sb(int n_mis) {
+static INLINE int mi_cols_aligned_to_sb(int n_mis) {
   return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
 }
 
@@ -275,10 +285,10 @@ static INLINE void set_skip_context(
   }
 }
 
-static void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
-                           int mi_row, int bh,
-                           int mi_col, int bw,
-                           int mi_rows, int mi_cols) {
+static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
+                                  int mi_row, int bh,
+                                  int mi_col, int bw,
+                                  int mi_rows, int mi_cols) {
   xd->mb_to_top_edge    = -((mi_row * MI_SIZE) * 8);
   xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8;
   xd->mb_to_left_edge   = -((mi_col * MI_SIZE) * 8);
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 0acee32f8..33ae5a896 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -98,8 +98,8 @@ static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
 
 int vp9_get_tx_size_context(const MACROBLOCKD *xd);
 
-static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
-                                    const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
+                                           const struct tx_probs *tx_probs) {
   switch (max_tx_size) {
     case TX_8X8:
       return tx_probs->p8x8[ctx];
@@ -113,13 +113,14 @@ static const vp9_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
   }
 }
 
-static const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size, const MACROBLOCKD *xd,
-                                     const struct tx_probs *tx_probs) {
+static INLINE const vp9_prob *get_tx_probs2(TX_SIZE max_tx_size,
+                                            const MACROBLOCKD *xd,
+                                            const struct tx_probs *tx_probs) {
   return get_tx_probs(max_tx_size, vp9_get_tx_size_context(xd), tx_probs);
 }
 
-static unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
-                                   struct tx_counts *tx_counts) {
+static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
+                                          struct tx_counts *tx_counts) {
   switch (max_tx_size) {
     case TX_8X8:
       return tx_counts->p8x8[ctx];
diff --git a/vp9/common/vp9_prob.c b/vp9/common/vp9_prob.c
index 884884e0b..f9bc06ecf 100644
--- a/vp9/common/vp9_prob.c
+++ b/vp9/common/vp9_prob.c
@@ -28,3 +28,34 @@ DECLARE_ALIGNED(16, const uint8_t, vp9_norm[256]) = {
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
+
+
+static unsigned int tree_merge_probs_impl(unsigned int i,
+                                          const vp9_tree_index *tree,
+                                          const vp9_prob *pre_probs,
+                                          const unsigned int *counts,
+                                          unsigned int count_sat,
+                                          unsigned int max_update,
+                                          vp9_prob *probs) {
+  const int l = tree[i];
+  const unsigned int left_count = (l <= 0)
+                 ? counts[-l]
+                 : tree_merge_probs_impl(l, tree, pre_probs, counts,
+                                         count_sat, max_update, probs);
+  const int r = tree[i + 1];
+  const unsigned int right_count = (r <= 0)
+                 ? counts[-r]
+                 : tree_merge_probs_impl(r, tree, pre_probs, counts,
+                                         count_sat, max_update, probs);
+  const unsigned int ct[2] = { left_count, right_count };
+  probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
+                              count_sat, max_update);
+  return left_count + right_count;
+}
+
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+                          const unsigned int *counts, unsigned int count_sat,
+                          unsigned int max_update_factor, vp9_prob *probs) {
+  tree_merge_probs_impl(0, tree, pre_probs, counts, count_sat,
+                        max_update_factor, probs);
+}
diff --git a/vp9/common/vp9_prob.h b/vp9/common/vp9_prob.h
index cc8d8ab38..f36148035 100644
--- a/vp9/common/vp9_prob.h
+++ b/vp9/common/vp9_prob.h
@@ -79,37 +79,10 @@ static INLINE vp9_prob merge_probs(vp9_prob pre_prob,
   return weighted_prob(pre_prob, prob, factor);
 }
 
-static unsigned int tree_merge_probs_impl(unsigned int i,
-                                          const vp9_tree_index *tree,
-                                          const vp9_prob *pre_probs,
-                                          const unsigned int *counts,
-                                          unsigned int count_sat,
-                                          unsigned int max_update_factor,
-                                          vp9_prob *probs) {
-  const int l = tree[i];
-  const unsigned int left_count = (l <= 0)
-                 ? counts[-l]
-                 : tree_merge_probs_impl(l, tree, pre_probs, counts,
-                                         count_sat, max_update_factor, probs);
-  const int r = tree[i + 1];
-  const unsigned int right_count = (r <= 0)
-                 ? counts[-r]
-                 : tree_merge_probs_impl(r, tree, pre_probs, counts,
-                                         count_sat, max_update_factor, probs);
-  const unsigned int ct[2] = { left_count, right_count };
-  probs[i >> 1] = merge_probs(pre_probs[i >> 1], ct,
-                              count_sat, max_update_factor);
-  return left_count + right_count;
-}
+void vp9_tree_merge_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
+                          const unsigned int *counts, unsigned int count_sat,
+                          unsigned int max_update_factor, vp9_prob *probs);
 
-static void tree_merge_probs(const vp9_tree_index *tree,
-                             const vp9_prob *pre_probs,
-                             const unsigned int *counts,
-                             unsigned int count_sat,
-                             unsigned int max_update_factor, vp9_prob *probs) {
-  tree_merge_probs_impl(0, tree, pre_probs, counts,
-                        count_sat, max_update_factor, probs);
-}
 
 DECLARE_ALIGNED(16, extern const uint8_t, vp9_norm[256]);
 
diff --git a/vp9/common/vp9_quant_common.c b/vp9/common/vp9_quant_common.c
index 6dbdb4216..9fef8b1ef 100644
--- a/vp9/common/vp9_quant_common.c
+++ b/vp9/common/vp9_quant_common.c
@@ -130,7 +130,8 @@ int16_t vp9_ac_quant(int qindex, int delta) {
 }
 
 
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex) {
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+                   int base_qindex) {
   if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
     const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
     return seg->abs_delta == SEGMENT_ABSDATA ?
diff --git a/vp9/common/vp9_quant_common.h b/vp9/common/vp9_quant_common.h
index af50e23cd..581104006 100644
--- a/vp9/common/vp9_quant_common.h
+++ b/vp9/common/vp9_quant_common.h
@@ -27,7 +27,8 @@ void vp9_init_quant_tables();
 int16_t vp9_dc_quant(int qindex, int delta);
 int16_t vp9_ac_quant(int qindex, int delta);
 
-int vp9_get_qindex(struct segmentation *seg, int segment_id, int base_qindex);
+int vp9_get_qindex(const struct segmentation *seg, int segment_id,
+                   int base_qindex);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index bf738c28b..dccd60938 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -39,18 +39,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                                enum mv_precision precision,
                                int x, int y);
 
-static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
-                                const struct scale_factors *sf) {
+static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride,
+                                       const struct scale_factors *sf) {
   const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset;
   const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset;
   return y * stride + x;
 }
 
-static void setup_pred_plane(struct buf_2d *dst,
-                             uint8_t *src, int stride,
-                             int mi_row, int mi_col,
-                             const struct scale_factors *scale,
-                             int subsampling_x, int subsampling_y) {
+static INLINE void setup_pred_plane(struct buf_2d *dst,
+                                    uint8_t *src, int stride,
+                                    int mi_row, int mi_col,
+                                    const struct scale_factors *scale,
+                                    int subsampling_x, int subsampling_y) {
   const int x = (MI_SIZE * mi_col) >> subsampling_x;
   const int y = (MI_SIZE * mi_row) >> subsampling_y;
   dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
diff --git a/vp9/common/vp9_scale.h b/vp9/common/vp9_scale.h
index 90b0d0bf9..a9dda1889 100644
--- a/vp9/common/vp9_scale.h
+++ b/vp9/common/vp9_scale.h
@@ -40,12 +40,12 @@ void vp9_setup_scale_factors_for_frame(struct scale_factors *sf,
                                        int other_w, int other_h,
                                        int this_w, int this_h);
 
-static int vp9_is_valid_scale(const struct scale_factors *sf) {
+static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) {
   return sf->x_scale_fp != REF_INVALID_SCALE &&
          sf->y_scale_fp != REF_INVALID_SCALE;
 }
 
-static int vp9_is_scaled(const struct scale_factors *sf) {
+static INLINE int vp9_is_scaled(const struct scale_factors *sf) {
   return sf->x_scale_fp != REF_NO_SCALE ||
          sf->y_scale_fp != REF_NO_SCALE;
 }
diff --git a/vp9/common/vp9_systemdependent.h b/vp9/common/vp9_systemdependent.h
index fb8626ce5..7455abce3 100644
--- a/vp9/common/vp9_systemdependent.h
+++ b/vp9/common/vp9_systemdependent.h
@@ -34,7 +34,7 @@ void vpx_reset_mmx_state(void);
 
 #if defined(_MSC_VER) && _MSC_VER < 1800
 // round is not defined in MSVC before VS2013.
-static int round(double x) {
+static INLINE int round(double x) {
   if (x < 0)
     return (int)ceil(x - 0.5);
   else
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 93ef7503f..d37afa5bc 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -691,9 +691,14 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) {
     vp9_update_frame_size(cm);
   }
 
-  vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height,
-                           cm->subsampling_x, cm->subsampling_y,
-                           VP9_DEC_BORDER_IN_PIXELS);
+  if (vp9_realloc_frame_buffer(
+          get_frame_new_buffer(cm), cm->width, cm->height,
+          cm->subsampling_x, cm->subsampling_y, VP9_DEC_BORDER_IN_PIXELS,
+          &cm->frame_bufs[cm->new_fb_idx].raw_frame_buffer, cm->get_fb_cb,
+          cm->cb_priv)) {
+    vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                       "Failed to allocate frame buffer");
+  }
 }
 
 static void setup_frame_size(VP9D_COMP *pbi,
@@ -1114,7 +1119,7 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi,
   cm->show_existing_frame = vp9_rb_read_bit(rb);
   if (cm->show_existing_frame) {
     // Show an existing frame directly.
-    int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
+    const int frame_to_show = cm->ref_frame_map[vp9_rb_read_literal(rb, 3)];
     ref_cnt_fb(cm->frame_bufs, &cm->new_fb_idx, frame_to_show);
     pbi->refresh_frame_flags = 0;
     cm->lf.filter_level = 0;
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 128b9f8af..542732aa0 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -220,11 +220,13 @@ void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
 
   CHECK_MEM_ERROR(cm, lf_sync->mutex_,
                   vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
+  for (i = 0; i < rows; ++i) {
+    pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+  }
+
   CHECK_MEM_ERROR(cm, lf_sync->cond_,
                   vpx_malloc(sizeof(*lf_sync->cond_) * rows));
-
   for (i = 0; i < rows; ++i) {
-    pthread_mutex_init(&lf_sync->mutex_[i], NULL);
     pthread_cond_init(&lf_sync->cond_[i], NULL);
   }
 #endif  // CONFIG_MULTITHREAD
@@ -242,18 +244,29 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
   if (lf_sync != NULL) {
     int i;
 
-    for (i = 0; i < rows; ++i) {
-      pthread_mutex_destroy(&lf_sync->mutex_[i]);
-      pthread_cond_destroy(&lf_sync->cond_[i]);
+    if (lf_sync->mutex_ != NULL) {
+      for (i = 0; i < rows; ++i) {
+        pthread_mutex_destroy(&lf_sync->mutex_[i]);
+      }
+      vpx_free(lf_sync->mutex_);
+    }
+    if (lf_sync->cond_ != NULL) {
+      for (i = 0; i < rows; ++i) {
+        pthread_cond_destroy(&lf_sync->cond_[i]);
+      }
+      vpx_free(lf_sync->cond_);
     }
 
-    vpx_free(lf_sync->mutex_);
-    vpx_free(lf_sync->cond_);
     vpx_free(lf_sync->cur_sb_col);
+    // clear the structure as the source of this call may be a resize in which
+    // case this call will be followed by an _alloc() which may fail.
+    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
   }
 #else
   (void)rows;
-  if (lf_sync != NULL)
+  if (lf_sync != NULL) {
     vpx_free(lf_sync->cur_sb_col);
+    vpx_memset(lf_sync, 0, sizeof(*lf_sync));
+  }
 #endif  // CONFIG_MULTITHREAD
 }
diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c
index 803d536ba..fd3488355 100644
--- a/vp9/decoder/vp9_onyxd_if.c
+++ b/vp9/decoder/vp9_onyxd_if.c
@@ -290,9 +290,14 @@ static void swap_frame_buffers(VP9D_COMP *pbi) {
   VP9_COMMON *const cm = &pbi->common;
 
   for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
-    if (mask & 1)
+    if (mask & 1) {
+      const int old_idx = cm->ref_frame_map[ref_index];
       ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index],
                  cm->new_fb_idx);
+      if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0)
+        cm->release_fb_cb(cm->cb_priv,
+                          &cm->frame_bufs[old_idx].raw_frame_buffer);
+    }
     ++ref_index;
   }
 
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index dc81c0d9e..dfb69ca53 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) {
 }
 
 // Macroblock activity masking
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
 #if USE_ACT_INDEX
   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
@@ -673,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-    vp9_activity_masking(cpi, x);
+    activity_masking(cpi, x);
 
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_clear_system_state();  // __asm emms;
@@ -713,36 +713,40 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void update_stats(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mi_8x8[0];
-  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const MACROBLOCK *const x = &cpi->mb;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MODE_INFO *const mi = xd->mi_8x8[0];
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
   if (!frame_is_intra_only(cm)) {
     const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                                      SEG_LVL_REF_FRAME);
+    if (!seg_ref_active) {
+      FRAME_COUNTS *const counts = &cm->counts;
+      const int inter_block = is_inter_block(mbmi);
 
-    if (!seg_ref_active)
-      cm->counts.intra_inter[vp9_get_intra_inter_context(xd)]
-                            [is_inter_block(mbmi)]++;
-
-    // If the segment reference feature is enabled we have only a single
-    // reference frame allowed for the segment so exclude it from
-    // the reference frame counts used to work out probabilities.
-    if (is_inter_block(mbmi) && !seg_ref_active) {
-      if (cm->reference_mode == REFERENCE_MODE_SELECT)
-        cm->counts.comp_inter[vp9_get_reference_mode_context(cm, xd)]
-                             [has_second_ref(mbmi)]++;
-
-      if (has_second_ref(mbmi)) {
-        cm->counts.comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
-                           [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
-      } else {
-        cm->counts.single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
-                             [mbmi->ref_frame[0] != LAST_FRAME]++;
-        if (mbmi->ref_frame[0] != LAST_FRAME)
-          cm->counts.single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
-                               [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
+      counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
+
+      // If the segment reference feature is enabled we have only a single
+      // reference frame allowed for the segment so exclude it from
+      // the reference frame counts used to work out probabilities.
+      if (inter_block) {
+        const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+
+        if (cm->reference_mode == REFERENCE_MODE_SELECT)
+          counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
+                            [has_second_ref(mbmi)]++;
+
+        if (has_second_ref(mbmi)) {
+          counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
+                          [ref0 == GOLDEN_FRAME]++;
+        } else {
+          counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
+                            [ref0 != LAST_FRAME]++;
+          if (ref0 != LAST_FRAME)
+            counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
+                              [ref0 != GOLDEN_FRAME]++;
+        }
       }
     }
   }
@@ -2168,108 +2172,6 @@ static void switch_tx_mode(VP9_COMP *cpi) {
     cpi->common.tx_mode = ALLOW_32X32;
 }
 
-static void encode_frame_internal(VP9_COMP *cpi) {
-  int mi_row;
-  MACROBLOCK *const x = &cpi->mb;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-//           cpi->common.current_video_frame, cpi->common.show_frame,
-//           cm->frame_type);
-
-  vp9_zero(cm->counts.switchable_interp);
-  vp9_zero(cpi->tx_stepdown_count);
-
-  xd->mi_8x8 = cm->mi_grid_visible;
-  // required for vp9_frame_init_quantizer
-  xd->mi_8x8[0] = cm->mi;
-
-  xd->last_mi = cm->prev_mi;
-
-  vp9_zero(cm->counts.mv);
-  vp9_zero(cpi->coef_counts);
-  vp9_zero(cm->counts.eob_branch);
-
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
-      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
-  switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
-
-  vp9_frame_init_quantizer(cpi);
-
-  vp9_initialize_rd_consts(cpi);
-  vp9_initialize_me_consts(cpi, cm->base_qindex);
-  switch_tx_mode(cpi);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-    // Initialize encode frame context.
-    init_encode_frame_mb_context(cpi);
-
-    // Build a frame level activity map
-    build_activity_map(cpi);
-  }
-
-  // Re-initialize encode frame context.
-  init_encode_frame_mb_context(cpi);
-
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
-
-  set_prev_mi(cm);
-
-  {
-    struct vpx_usec_timer emr_timer;
-    vpx_usec_timer_start(&emr_timer);
-
-    {
-      // Take tiles into account and give start/end MB
-      int tile_col, tile_row;
-      TOKENEXTRA *tp = cpi->tok;
-      const int tile_cols = 1 << cm->log2_tile_cols;
-      const int tile_rows = 1 << cm->log2_tile_rows;
-
-      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-        for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-          TileInfo tile;
-          TOKENEXTRA *tp_old = tp;
-
-          // For each row of SBs in the frame
-          vp9_tile_init(&tile, cm, tile_row, tile_col);
-          for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_sb_row(cpi, &tile, mi_row, &tp);
-
-          cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
-          assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
-        }
-      }
-    }
-
-    vpx_usec_timer_mark(&emr_timer);
-    cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
-  }
-
-  if (cpi->sf.skip_encode_sb) {
-    int j;
-    unsigned int intra_count = 0, inter_count = 0;
-    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
-      intra_count += cm->counts.intra_inter[j][0];
-      inter_count += cm->counts.intra_inter[j][1];
-    }
-    cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
-    cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
-    cpi->sf.skip_encode_frame &= cm->show_frame;
-  } else {
-    cpi->sf.skip_encode_frame = 0;
-  }
-
-#if 0
-  // Keep record of the total distortion this time around for future use
-  cpi->last_frame_distortion = cpi->frame_distortion;
-#endif
-}
 
 static int check_dual_ref_flags(VP9_COMP *cpi) {
   const int ref_flags = cpi->ref_frame_flags;
@@ -2559,28 +2461,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                      &dummy_rate, &dummy_dist, 1);
   }
 }
+// end RTC play code
 
-
-static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+static void encode_frame_internal(VP9_COMP *cpi) {
   int mi_row;
-  MACROBLOCK * const x = &cpi->mb;
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCKD * const xd = &x->e_mbd;
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
 
 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
 //           cpi->common.current_video_frame, cpi->common.show_frame,
 //           cm->frame_type);
 
-// debug output
-#if DBG_PRNT_SEGMAP
-  {
-    FILE *statsfile;
-    statsfile = fopen("segmap2.stt", "a");
-    fprintf(statsfile, "\n");
-    fclose(statsfile);
-  }
-#endif
-
   vp9_zero(cm->counts.switchable_interp);
   vp9_zero(cpi->tx_stepdown_count);
 
@@ -2590,7 +2482,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
 
   xd->last_mi = cm->prev_mi;
 
-  vp9_zero(cpi->common.counts.mv);
+  vp9_zero(cm->counts.mv);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cm->counts.eob_branch);
 
@@ -2603,7 +2495,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   vp9_initialize_rd_consts(cpi);
   vp9_initialize_me_consts(cpi, cm->base_qindex);
   switch_tx_mode(cpi);
-  cpi->sf.always_this_block_size = BLOCK_16X16;
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
     // Initialize encode frame context.
@@ -2642,9 +2533,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
           // For each row of SBs in the frame
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
-
+               mi_row < tile.mi_row_end; mi_row += 8) {
+            if (cpi->sf.use_pick_mode)
+              encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+            else
+              encode_sb_row(cpi, &tile, mi_row, &tp);
+          }
           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
         }
@@ -2674,8 +2568,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   cpi->last_frame_distortion = cpi->frame_distortion;
 #endif
 }
-// end RTC play code
-
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2700,7 +2592,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     }
   }
 
-  if (cpi->sf.RD) {
+  if (cpi->sf.frame_parameter_update) {
     int i;
     REFERENCE_MODE reference_mode;
     /*
@@ -2750,10 +2642,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     select_tx_mode(cpi);
     cm->reference_mode = reference_mode;
 
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2833,10 +2722,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   } else {
     // Force the usage of the BILINEAR interp_filter.
     cm->interp_filter = BILINEAR;
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
   }
 }
 
@@ -2913,7 +2799,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
   const int mi_height = num_8x8_blocks_high_lookup[bsize];
   x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
                    (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
-                   !cpi->sf.super_fast_rtc;
+                   !cpi->sf.use_pick_mode;
   x->skip_optimize = ctx->is_coded;
   ctx->is_coded = 1;
   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 838f74e8c..dc35044d6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb,
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
-                          int section_target_bandwitdh) {
+int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh) {
   int q;
   const int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
@@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
-    const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats,
-                                     section_target_bandwidth);
+    const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+                                                section_target_bandwidth);
 
     rc->active_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 054ecf811..7e612183e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -88,6 +88,8 @@ void vp9_end_first_pass(struct VP9_COMP *cpi);
 void vp9_init_second_pass(struct VP9_COMP *cpi);
 void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
 void vp9_end_second_pass(struct VP9_COMP *cpi);
+int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh);
 
 // Post encode update of the rate control parameters for 2-pass
 void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 917d3a4a2..932e8502f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -842,10 +842,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
       sf->intra_uv_mode_mask[i] = INTRA_DC_ONLY;
     }
-    sf->RD = 0;
+    sf->frame_parameter_update = 0;
   }
   if (speed >= 6) {
-    sf->super_fast_rtc = 1;
+    sf->always_this_block_size = BLOCK_16X16;
+    sf->use_pick_mode = 1;
   }
 }
 
@@ -863,7 +864,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
     cpi->mode_chosen_counts[i] = 0;
 
   // best quality defaults
-  sf->RD = 1;
+  sf->frame_parameter_update = 1;
   sf->search_method = NSTEP;
   sf->recode_loop = ALLOW_RECODE;
   sf->subpel_search_method = SUBPEL_TREE;
@@ -904,7 +905,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->use_fast_coef_updates = 0;
   sf->using_small_partition_info = 0;
   sf->mode_skip_start = MAX_MODES;  // Mode index at which mode skip mask set
-  sf->super_fast_rtc = 0;
+  sf->use_pick_mode = 0;
 
   switch (cpi->oxcf.mode) {
     case MODE_BESTQUALITY:
@@ -963,7 +964,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
                                cpi->oxcf.width, cpi->oxcf.height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS))
+                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate altref buffer");
 }
@@ -1031,14 +1032,14 @@ static void update_frame_size(VP9_COMP *cpi) {
   if (vp9_realloc_frame_buffer(&cpi->last_frame_uf,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS))
+                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate last frame buffer");
 
   if (vp9_realloc_frame_buffer(&cpi->scaled_source,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS))
+                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL))
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to reallocate scaled source buffer");
 
@@ -1155,14 +1156,17 @@ static void init_layer_context(VP9_COMP *const cpi) {
     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
     RATE_CONTROL *const lrc = &lc->rc;
     lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q];
-    lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
-    lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+    lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality;
+    lrc->last_q[INTER_FRAME] = lrc->active_worst_quality;
     lrc->ni_av_qi = lrc->active_worst_quality;
     lrc->total_actual_bits = 0;
     lrc->total_target_vs_actual = 0;
     lrc->ni_tot_qi = 0;
     lrc->tot_q = 0.0;
+    lrc->avg_q = 0.0;
     lrc->ni_frames = 0;
+    lrc->decimation_count = 0;
+    lrc->decimation_factor = 0;
     lrc->rate_correction_factor = 1.0;
     lrc->key_frame_rate_correction_factor = 1.0;
     lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
@@ -1207,13 +1211,24 @@ static void update_layer_context_change_config(VP9_COMP *const cpi,
 // for the current layer.
 static void update_layer_framerate(VP9_COMP *const cpi) {
   int temporal_layer = cpi->svc.temporal_layer_id;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
   LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
   RATE_CONTROL *const lrc = &lc->rc;
-  lc->framerate = cpi->oxcf.framerate /
-      cpi->oxcf.ts_rate_decimator[temporal_layer];
-  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth /
-      lc->framerate);
+  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
   lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+  // Update the average layer frame size (non-cumulative per-frame-bw).
+  if (temporal_layer == 0) {
+    lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+  } else {
+    double prev_layer_framerate = oxcf->framerate /
+        oxcf->ts_rate_decimator[temporal_layer - 1];
+    int prev_layer_target_bandwidth =
+        oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+    lc->avg_frame_size =
+        (int)(lc->target_bandwidth - prev_layer_target_bandwidth) /
+        (lc->framerate - prev_layer_framerate);
+  }
 }
 
 // Prior to encoding the frame, set the layer context, for the current layer
@@ -1729,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
                                sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
   }
 
-#ifdef ENTROPY_STATS
-  if (cpi->pass != 1)
-    init_context_counters();
-#endif
-
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2664,7 +2674,7 @@ static void scale_references(VP9_COMP *cpi) {
       vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
                                cm->width, cm->height,
                                cm->subsampling_x, cm->subsampling_y,
-                               VP9_ENC_BORDER_IN_PIXELS);
+                               VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
       scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
       cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
     } else {
@@ -2767,10 +2777,10 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 static void encode_without_recode_loop(VP9_COMP *cpi,
                                        size_t *size,
                                        uint8_t *dest,
-                                       int *q) {
+                                       int q) {
   VP9_COMMON *const cm = &cpi->common;
   vp9_clear_system_state();  // __asm emms;
-  vp9_set_quantizer(cpi, *q);
+  vp9_set_quantizer(cpi, q);
 
   // Set up entropy context depending on frame type. The decoder mandates
   // the use of the default context, index 0, for keyframes and inter
@@ -2804,7 +2814,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     size_t *size,
                                     uint8_t *dest,
-                                    int *q,
+                                    int q,
                                     int bottom_index,
                                     int top_index) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2824,7 +2834,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
   do {
     vp9_clear_system_state();  // __asm emms;
 
-    vp9_set_quantizer(cpi, *q);
+    vp9_set_quantizer(cpi, q);
 
     if (loop_count == 0) {
       // Set up entropy context depending on frame type. The decoder mandates
@@ -2865,7 +2875,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
       vp9_save_coding_context(cpi);
       cpi->dummy_packing = 1;
-      if (!cpi->sf.super_fast_rtc)
+      if (!cpi->sf.use_pick_mode)
         vp9_pack_bitstream(cpi, dest, size);
 
       cpi->rc.projected_frame_size = (*size) << 3;
@@ -2881,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
       if ((cm->frame_type == KEY_FRAME) &&
            cpi->rc.this_key_frame_forced &&
            (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) {
-        int last_q = *q;
+        int last_q = q;
         int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
@@ -2897,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             (kf_err > low_err_target &&
              cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
           // Lower q_high
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           // Adjust Q
-          *q = ((*q) * high_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low) >> 1);
+          q = (q * high_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low) >> 1);
         } else if (kf_err < low_err_target &&
                    cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
           // The key frame is much better than the previous frame
           // Raise q_low
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           // Adjust Q
-          *q = ((*q) * low_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low + 1) >> 1);
+          q = (q * low_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low + 1) >> 1);
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else if (recode_loop_test(
           cpi, frame_over_shoot_limit, frame_under_shoot_limit,
-          *q, MAX(q_high, top_index), bottom_index)) {
+          q, MAX(q_high, top_index), bottom_index)) {
         // Is the projected frame size out of range and are we allowed
         // to attempt to recode.
-        int last_q = *q;
+        int last_q = q;
         int retries = 0;
 
         // Frame size out of permitted range:
@@ -2935,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             q_high = cpi->rc.worst_quality;
 
           // Raise Qlow as to at least the current value
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           if (undershoot_seen || loop_count > 1) {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 1);
 
-            *q = (q_high + q_low + 1) / 2;
+            q = (q_high + q_low + 1) / 2;
           } else {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, MAX(q_high, top_index));
 
-            while (*q < q_low && retries < 10) {
+            while (q < q_low && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, MAX(q_high, top_index));
               retries++;
             }
@@ -2960,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           overshoot_seen = 1;
         } else {
           // Frame is too small
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           if (overshoot_seen || loop_count > 1) {
             vp9_rc_update_rate_correction_factors(cpi, 1);
-            *q = (q_high + q_low) / 2;
+            q = (q_high + q_low) / 2;
           } else {
             vp9_rc_update_rate_correction_factors(cpi, 0);
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, top_index);
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
             if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
-                *q < q_low) {
-              q_low = *q;
+                q < q_low) {
+              q_low = q;
             }
 
-            while (*q > q_high && retries < 10) {
+            while (q > q_high && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, top_index);
               retries++;
             }
@@ -2990,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else {
         loop = 0;
       }
@@ -3210,9 +3220,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
 
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    encode_without_recode_loop(cpi, size, dest, &q);
+    encode_without_recode_loop(cpi, size, dest, q);
   } else {
-    encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index);
+    encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
   }
 
   // Special case code to reduce pulsing when key frames are forced at a
@@ -3272,10 +3282,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
   }
 
-#ifdef ENTROPY_STATS
-  vp9_update_mode_context_stats(cpi);
-#endif
-
 #if 0
   output_frame_level_debug_stats(cpi);
 #endif
@@ -3678,7 +3684,7 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
   vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
                            cm->width, cm->height,
                            cm->subsampling_x, cm->subsampling_y,
-                           VP9_ENC_BORDER_IN_PIXELS);
+                           VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
 
   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
     const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 5fd8e5a54..243ca1123 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -45,7 +45,7 @@ extern "C" {
 #else
 #define MIN_GF_INTERVAL             4
 #endif
-#define DEFAULT_GF_INTERVAL         11
+#define DEFAULT_GF_INTERVAL         10
 #define DEFAULT_KF_BOOST            2000
 #define DEFAULT_GF_BOOST            2000
 
@@ -209,8 +209,8 @@ typedef enum {
 } RECODE_LOOP_TYPE;
 
 typedef struct {
-  // This flag refers to whether or not to perform rd optimization.
-  int RD;
+  // Frame level coding parameter update
+  int frame_parameter_update;
 
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
   SEARCH_METHODS search_method;
@@ -390,8 +390,8 @@ typedef struct {
   // by only looking at counts from 1/2 the bands.
   int use_fast_coef_updates;  // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
 
-  // This flag control the use of the new super fast rtc mode
-  int super_fast_rtc;
+  // This flag controls the use of non-RD mode decision.
+  int use_pick_mode;
 } SPEED_FEATURES;
 
 typedef struct {
@@ -401,6 +401,7 @@ typedef struct {
   int64_t optimal_buffer_level;
   int64_t maximum_buffer_size;
   double framerate;
+  int avg_frame_size;
 } LAYER_CONTEXT;
 
 typedef struct VP9_COMP {
@@ -691,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi);
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
 
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
-
 void vp9_set_speed_features(VP9_COMP *cpi);
 
 int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 1aaa4162b..512b6bf18 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -170,6 +170,10 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   mbmi->ref_frame[1] = NONE;
   mbmi->tx_size = MIN(max_txsize_lookup[bsize],
                       tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+  mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
+                        EIGHTTAP : cpi->common.interp_filter;
+  mbmi->skip_coeff = 0;
+  mbmi->segment_id = 0;
 
   for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) {
     x->pred_mv_sad[ref_frame] = INT_MAX;
@@ -219,15 +223,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         mbmi->ref_frame[0] = ref_frame;
         mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
         xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
-        mbmi->interp_filter = cpi->common.interp_filter == SWITCHABLE ?
-            EIGHTTAP : cpi->common.interp_filter;
-
-        mbmi->ref_frame[1] = INTRA_FRAME;
-        mbmi->tx_size = max_txsize_lookup[bsize];
         mbmi->uv_mode = this_mode;
-        mbmi->skip_coeff = 0;
-        mbmi->sb_type = bsize;
-        mbmi->segment_id = 0;
       }
     }
   }
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index a2eea1cd7..862573f3f 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -79,55 +79,47 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
                             const int16_t *dequant_ptr,
                             int zbin_oq_value, uint16_t *eob_ptr,
                             const int16_t *scan, const int16_t *iscan) {
-  int i, rc, eob;
-  int zbins[2], nzbins[2];
-  int x, y, z, sz;
+  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
+                         ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
+  const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
   int idx = 0;
   int idx_arr[1024];
+  int i, eob = -1;
 
-  vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-  vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
-  eob = -1;
-
-  // Base ZBIN
-  zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
-  zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
-  nzbins[0] = zbins[0] * -1;
-  nzbins[1] = zbins[1] * -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
 
   if (!skip_block) {
     // Pre-scan pass
     for (i = 0; i < n_coeffs; i++) {
-      rc = scan[i];
-      z = coeff_ptr[rc];
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
 
       // If the coefficient is out of the base ZBIN range, keep it for
       // quantization.
-      if (z >= zbins[rc != 0] || z <= nzbins[rc != 0])
+      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
         idx_arr[idx++] = i;
     }
 
     // Quantization pass: only process the coefficients selected in
     // pre-scan pass. Note: idx can be zero.
     for (i = 0; i < idx; i++) {
-      rc = scan[idx_arr[i]];
-
-      z = coeff_ptr[rc];
-      sz = (z >> 31);                               // sign of z
-      x  = (z ^ sz) - sz;                           // x = abs(z)
-
-      x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      x  = clamp(x, INT16_MIN, INT16_MAX);
-      y  = ((((x * quant_ptr[rc != 0]) >> 16) + x) *
-            quant_shift_ptr[rc != 0]) >> 15;      // quantize (x)
-
-      x  = (y ^ sz) - sz;                         // get the sign back
-      qcoeff_ptr[rc]  = x;                        // write to destination
-      dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / 2;  // dequantized value
-
-      if (y)
-        eob = idx_arr[i];                         // last nonzero coeffs
+      const int rc = scan[idx_arr[i]];
+      const int coeff = coeff_ptr[rc];
+      const int coeff_sign = (coeff >> 31);
+      int tmp;
+      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+      abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+      tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+               quant_shift_ptr[rc != 0]) >> 15;
+
+      qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+      if (tmp)
+        eob = idx_arr[i];
     }
   }
   *eob_ptr = eob + 1;
@@ -136,8 +128,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane* p = &x->plane[plane];
-  struct macroblockd_plane* pd = &xd->plane[plane];
+  struct macroblock_plane *p = &x->plane[plane];
+  struct macroblockd_plane *pd = &xd->plane[plane];
 
   vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
            16, x->skip_block,
@@ -223,38 +215,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
 }
 
 void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
-  int i;
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
-  int zbin_extra;
-  int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
-  const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id,
-                                    cpi->common.base_qindex);
-
-  int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+  const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+  const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+  const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+  const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj;
+  int i;
 
   // Y
-  zbin_extra = (cpi->common.y_dequant[qindex][1] *
-                 (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
   x->plane[0].quant = cpi->y_quant[qindex];
   x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
   x->plane[0].zbin = cpi->y_zbin[qindex];
   x->plane[0].round = cpi->y_round[qindex];
-  x->plane[0].zbin_extra = (int16_t)zbin_extra;
-  x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
+  x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
+  xd->plane[0].dequant = cm->y_dequant[qindex];
 
   // UV
-  zbin_extra = (cpi->common.uv_dequant[qindex][1] *
-                (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
   for (i = 1; i < 3; i++) {
     x->plane[i].quant = cpi->uv_quant[qindex];
     x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
     x->plane[i].zbin = cpi->uv_zbin[qindex];
     x->plane[i].round = cpi->uv_round[qindex];
-    x->plane[i].zbin_extra = (int16_t)zbin_extra;
-    x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
+    x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
+    xd->plane[i].dequant = cm->uv_dequant[qindex];
   }
 
 #if CONFIG_ALPHA
@@ -263,18 +247,14 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
   x->plane[3].zbin = cpi->a_zbin[qindex];
   x->plane[3].round = cpi->a_round[qindex];
   x->plane[3].zbin_extra = (int16_t)zbin_extra;
-  x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
+  xd->plane[3].dequant = cm->a_dequant[qindex];
 #endif
 
-  x->skip_block = vp9_segfeature_active(&cpi->common.seg, segment_id,
-                                        SEG_LVL_SKIP);
-
-  /* save this macroblock QIndex for vp9_update_zbin_extra() */
+  x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
   x->q_index = qindex;
 
-  /* R/D setup */
-  cpi->mb.errorperbit = rdmult >> 6;
-  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
+  x->errorperbit = rdmult >> 6;
+  x->errorperbit += (x->errorperbit == 0);
 
   vp9_initialize_me_consts(cpi, x->q_index);
 }
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 04539c845..39fea09dd 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -215,7 +215,7 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
                                    rc->av_per_frame_bandwidth >> 5);
   if (target < min_frame_target)
     target = min_frame_target;
-  if (cpi->refresh_golden_frame && rc->source_alt_ref_active) {
+  if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
     // If there is an active ARF at this location use the minimum
     // bits on this frame even if it is a constructed arf.
     // The active maximum quantizer insures that an appropriate
@@ -487,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
       double q_adj_factor = 1.0;
       double q_val;
 
-      // Baseline value derived from cpi->active_worst_quality and kf boost
-      active_best_quality = get_active_quality(active_worst_quality,
+      active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
                                                rc->kf_boost,
                                                kf_low, kf_high,
                                                kf_low_motion_minq,
@@ -521,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
         rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
       q = rc->avg_frame_qindex[INTER_FRAME];
     } else {
-      q = active_worst_quality;
+      q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ?
+          active_worst_quality : rc->avg_frame_qindex[KEY_FRAME];
     }
     // For constrained quality dont allow Q less than the cq level
     if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
@@ -565,10 +565,24 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
       active_best_quality = cpi->cq_target_quality;
     } else {
       // Use the lower of active_worst_quality and recent/average Q.
-      if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-       active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
-      else
-        active_best_quality = inter_minq[active_worst_quality];
+      if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+        if (cm->current_video_frame > 1) {
+          if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        } else {
+          if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        }
+      } else {
+        if (cm->current_video_frame > 1)
+          active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+        else
+          active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+      }
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -850,7 +864,7 @@ int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
   // JBB : This is realtime mode.  In real time mode the first frame
   // should be larger. Q of 0 is disabled because we force tx size to be
   // 16x16...
-  if (cpi->sf.super_fast_rtc) {
+  if (cpi->sf.use_pick_mode) {
     if (cpi->common.current_video_frame == 0)
       q /= 3;
     if (q == 0)
@@ -973,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
     rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+      !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
     rc->last_q[2] = cm->base_qindex;
     rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1056,7 +1071,7 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
 #define USE_ALTREF_FOR_ONE_PASS   1
 
 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
-  static const int af_ratio = 5;
+  static const int af_ratio = 10;
   const RATE_CONTROL *rc = &cpi->rc;
   int target;
 #if USE_ALTREF_FOR_ONE_PASS
@@ -1073,12 +1088,42 @@ static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
 }
 
 static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
-  static const int kf_ratio = 12;
+  static const int kf_ratio = 25;
   const RATE_CONTROL *rc = &cpi->rc;
   int target = rc->av_per_frame_bandwidth * kf_ratio;
   return vp9_rc_clamp_iframe_target_size(cpi, target);
 }
 
+static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+  int active_worst_quality;
+  if (cpi->common.frame_type == KEY_FRAME) {
+    if (cpi->common.current_video_frame == 0) {
+      active_worst_quality = cpi->rc.worst_quality;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    }
+  } else if (!cpi->rc.is_src_frame_alt_ref &&
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME];
+    }
+  } else {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+    }
+  }
+  if (active_worst_quality > cpi->rc.worst_quality)
+    active_worst_quality = cpi->rc.worst_quality;
+  return active_worst_quality;
+}
+
 void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -1094,22 +1139,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
     rc->frames_to_key = cpi->key_frame_frequency;
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
-    if (cm->current_video_frame == 0) {
-      rc->active_worst_quality = rc->worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      rc->active_worst_quality = MIN(rc->worst_quality,
-                                     rc->last_q[KEY_FRAME] * 2);
-    }
   } else {
     cm->frame_type = INTER_FRAME;
-    if (cm->current_video_frame == 1) {
-      rc->active_worst_quality = rc->worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      rc->active_worst_quality = MIN(rc->worst_quality,
-                                     rc->last_q[INTER_FRAME] * 2);
-    }
   }
   if (rc->frames_till_gf_update_due == 0) {
     rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
@@ -1121,6 +1152,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
     rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
     rc->gfu_boost = DEFAULT_GF_BOOST;
   }
+  cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
   if (cm->frame_type == KEY_FRAME)
     target = calc_iframe_target_size_one_pass_vbr(cpi);
   else
@@ -1139,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   const RATE_CONTROL *rc = &cpi->rc;
   int active_worst_quality = rc->active_worst_quality;
   // Maximum limit for down adjustment, ~20%.
-  int max_adjustment_down = active_worst_quality / 5;
   // Buffer level below which we push active_worst to worst_quality.
   int critical_level = oxcf->optimal_buffer_level >> 2;
   int adjustment = 0;
   int buff_lvl_step = 0;
+  if (cpi->common.frame_type == KEY_FRAME)
+    return rc->worst_quality;
   if (rc->buffer_level > oxcf->optimal_buffer_level) {
     // Adjust down.
+    int max_adjustment_down = active_worst_quality / 5;
     if (max_adjustment_down) {
       buff_lvl_step = (int)((oxcf->maximum_buffer_size -
           oxcf->optimal_buffer_level) / max_adjustment_down);
@@ -1175,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
 static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth;
-  const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
-                                   FRAME_OVERHEAD_BITS);
   const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
   const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+  int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+                             FRAME_OVERHEAD_BITS);
+  int target = rc->av_per_frame_bandwidth;
+  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    // Note that for layers, av_per_frame_bandwidth is the cumulative
+    // per-frame-bandwidth. For the target size of this frame, use the
+    // layer average frame size (i.e., non-cumulative per-frame-bw).
+    int current_temporal_layer = cpi->svc.temporal_layer_id;
+    const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+    target = lc->avg_frame_size;
+    min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+  }
   if (diff > 0) {
     // Lower the target bandwidth for this frame.
     const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
@@ -1219,16 +1262,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     cpi->rc.source_alt_ref_active = 0;
     if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
     }
   } else {
     cm->frame_type = INTER_FRAME;
     if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
-      cpi->rc.active_worst_quality =
-          calc_active_worst_quality_one_pass_cbr(cpi);
     }
   }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
   vp9_rc_set_frame_target(cpi, target);
   cpi->rc.frames_till_gf_update_due = INT_MAX;
   cpi->rc.baseline_gf_interval = INT_MAX;
@@ -1249,12 +1291,12 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
     rc->kf_boost = DEFAULT_KF_BOOST;
     rc->source_alt_ref_active = 0;
     target = calc_iframe_target_size_one_pass_cbr(cpi);
-    rc->active_worst_quality = rc->worst_quality;
   } else {
     cm->frame_type = INTER_FRAME;
     target = calc_pframe_target_size_one_pass_cbr(cpi);
-    rc->active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
   }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
   vp9_rc_set_frame_target(cpi, target);
   // Don't use gf_update by default in CBR mode.
   rc->frames_till_gf_update_due = INT_MAX;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7b17b8582..4408353ab 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -285,7 +285,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   cpi->RDDIV = RDDIV_BITS;  // in bits (to multiply D by 128)
   cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
 
-  x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO + (x->errorperbit == 0);
+  x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
+  x->errorperbit += (x->errorperbit == 0);
 
   vp9_set_speed_features(cpi);
 
@@ -296,7 +297,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
 
   fill_token_costs(x->token_costs, cm->fc.coef_probs);
 
-  if (!cpi->sf.super_fast_rtc) {
+  if (!cpi->sf.use_pick_mode) {
     for (i = 0; i < PARTITION_CONTEXTS; i++)
       vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
                       vp9_partition_tree);
@@ -443,7 +444,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 
     if (i == 0)
       x->pred_sse[ref] = sse;
-    if (cpi->sf.super_fast_rtc) {
+    if (cpi->sf.use_pick_mode) {
       dist_sum += (int)sse;
     } else {
       int rate;
@@ -1645,14 +1646,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
                                     int mi_row, int mi_col) {
-  int i, br = 0, idx, idy;
+  int k, br = 0, idx, idy;
   int64_t bd = 0, block_sse = 0;
   MB_PREDICTION_MODE this_mode;
+  MACROBLOCKD *xd = &x->e_mbd;
   VP9_COMMON *cm = &cpi->common;
-  MODE_INFO *mi = x->e_mbd.mi_8x8[0];
+  MODE_INFO *mi = xd->mi_8x8[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
-  struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
   const int label_count = 4;
   int64_t this_segment_rd = 0;
   int label_mv_thresh;
@@ -1660,7 +1662,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-  vp9_variance_fn_ptr_t *v_fn_ptr;
+  vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
   ENTROPY_CONTEXT t_above[2], t_left[2];
   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   int mode_idx;
@@ -1670,8 +1672,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
 
-  v_fn_ptr = &cpi->fn_ptr[bsize];
-
   // 64 makes this threshold really big effectively
   // making it so that we very rarely check mvs on
   // segments.   setting this to 1 would make mv thresh
@@ -1687,20 +1687,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
-      i = idy * 2 + idx;
-
-      frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
-      vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                    i, 0, mi_row, mi_col,
-                                    &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
-                                    &frame_mv[NEARMV][mbmi->ref_frame[0]]);
-      if (has_second_rf) {
-        frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
-        vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                      i, 1, mi_row, mi_col,
-                                      &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
-                                      &frame_mv[NEARMV][mbmi->ref_frame[1]]);
+      const int i = idy * 2 + idx;
+      int ref;
+
+      for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+        const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+        frame_mv[ZEROMV][frame].as_int = 0;
+        vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
+                                      &frame_mv[NEARESTMV][frame],
+                                      &frame_mv[NEARMV][frame]);
       }
+
       // search for the best motion vector on this segment
       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
         const struct buf_2d orig_src = x->plane[0].src;
@@ -2042,8 +2039,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   bsi->sse = block_sse;
 
   // update the coding decisions
-  for (i = 0; i < 4; ++i)
-    bsi->modes[i] = mi->bmi[i].as_mode;
+  for (k = 0; k < 4; ++k)
+    bsi->modes[k] = mi->bmi[k].as_mode;
 }
 
 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index f9ba41b22..85e83b834 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -23,6 +23,8 @@ VP9_COMMON_SRCS-yes += common/vp9_entropymode.c
 VP9_COMMON_SRCS-yes += common/vp9_entropymv.c
 VP9_COMMON_SRCS-yes += common/vp9_filter.c
 VP9_COMMON_SRCS-yes += common/vp9_filter.h
+VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c
+VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h
 VP9_COMMON_SRCS-yes += common/generic/vp9_systemdependent.c
 VP9_COMMON_SRCS-yes += common/vp9_idct.c
 VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h
@@ -82,7 +84,7 @@ VP9_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp9_postproc_mmx.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_postproc_sse2.asm
 endif
 
-ifeq ($(USE_X86INC),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_copy_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_intrapred_sse2.asm
 VP9_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/vp9_intrapred_ssse3.asm
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 85a3b1117..ece6d5280 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -885,10 +885,9 @@ static const vpx_codec_cx_pkt_t *vp9e_get_cxdata(vpx_codec_alg_priv_t  *ctx,
 static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
                                           int ctr_id,
                                           va_list args) {
-  vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+  vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
 
-  if (data) {
-    vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+  if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
@@ -903,10 +902,9 @@ static vpx_codec_err_t vp9e_set_reference(vpx_codec_alg_priv_t *ctx,
 static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
                                            int ctr_id,
                                            va_list args) {
-  vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *);
+  vpx_ref_frame_t *frame = va_arg(args, vpx_ref_frame_t *);
 
-  if (data) {
-    vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data;
+  if (frame != NULL) {
     YV12_BUFFER_CONFIG sd;
 
     image2yuvconfig(&frame->img, &sd);
@@ -921,13 +919,13 @@ static vpx_codec_err_t vp9e_copy_reference(vpx_codec_alg_priv_t *ctx,
 static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx,
                                      int ctr_id,
                                      va_list args) {
-  vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
+  vp9_ref_frame_t *frame = va_arg(args, vp9_ref_frame_t *);
 
-  if (data) {
+  if (frame != NULL) {
     YV12_BUFFER_CONFIG* fb;
 
-    vp9_get_reference_enc(ctx->cpi, data->idx, &fb);
-    yuvconfig2image(&data->img, fb, NULL);
+    vp9_get_reference_enc(ctx->cpi, frame->idx, &fb);
+    yuvconfig2image(&frame->img, fb, NULL);
     return VPX_CODEC_OK;
   } else {
     return VPX_CODEC_INVALID_PARAM;
@@ -938,11 +936,11 @@ static vpx_codec_err_t vp9e_set_previewpp(vpx_codec_alg_priv_t *ctx,
                                           int ctr_id,
                                           va_list args) {
 #if CONFIG_VP9_POSTPROC
-  vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+  vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
   (void)ctr_id;
 
-  if (data) {
-    ctx->preview_ppcfg = *((vp8_postproc_cfg_t *)data);
+  if (config != NULL) {
+    ctx->preview_ppcfg = *config;
     return VPX_CODEC_OK;
   } else {
     return VPX_CODEC_INVALID_PARAM;
@@ -1016,14 +1014,13 @@ static vpx_codec_err_t vp9e_set_activemap(vpx_codec_alg_priv_t *ctx,
 static vpx_codec_err_t vp9e_set_scalemode(vpx_codec_alg_priv_t *ctx,
                                           int ctr_id,
                                           va_list args) {
-  vpx_scaling_mode_t *data =  va_arg(args, vpx_scaling_mode_t *);
+  vpx_scaling_mode_t *scalemode =  va_arg(args, vpx_scaling_mode_t *);
 
-  if (data) {
+  if (scalemode != NULL) {
     int res;
-    vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data;
     res = vp9_set_internal_size(ctx->cpi,
-                                (VPX_SCALING)scalemode.h_scaling_mode,
-                                (VPX_SCALING)scalemode.v_scaling_mode);
+                                (VPX_SCALING)scalemode->h_scaling_mode,
+                                (VPX_SCALING)scalemode->v_scaling_mode);
 
     if (!res) {
       return VPX_CODEC_OK;
@@ -1070,29 +1067,23 @@ static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
 
 static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
                                                int ctr_id, va_list args) {
-  vpx_svc_parameters_t *data = va_arg(args, vpx_svc_parameters_t *);
   VP9_COMP *cpi = (VP9_COMP *)ctx->cpi;
-  vpx_svc_parameters_t params;
+  vpx_svc_parameters_t *params = va_arg(args, vpx_svc_parameters_t *);
 
-  if (data == NULL) {
-    return VPX_CODEC_INVALID_PARAM;
-  }
+  if (params == NULL) return VPX_CODEC_INVALID_PARAM;
 
-  params = *(vpx_svc_parameters_t *)data;
+  cpi->svc.spatial_layer_id = params->spatial_layer;
+  cpi->svc.temporal_layer_id = params->temporal_layer;
 
-  cpi->svc.spatial_layer_id = params.spatial_layer;
-  cpi->svc.temporal_layer_id = params.temporal_layer;
+  cpi->lst_fb_idx = params->lst_fb_idx;
+  cpi->gld_fb_idx = params->gld_fb_idx;
+  cpi->alt_fb_idx = params->alt_fb_idx;
 
-  cpi->lst_fb_idx = params.lst_fb_idx;
-  cpi->gld_fb_idx = params.gld_fb_idx;
-  cpi->alt_fb_idx = params.alt_fb_idx;
-
-  if (vp9_set_size_literal(ctx->cpi, params.width, params.height) != 0) {
+  if (vp9_set_size_literal(ctx->cpi, params->width, params->height) != 0)
     return VPX_CODEC_INVALID_PARAM;
-  }
 
-  ctx->cfg.rc_max_quantizer = params.max_quantizer;
-  ctx->cfg.rc_min_quantizer = params.min_quantizer;
+  ctx->cfg.rc_max_quantizer = params->max_quantizer;
+  ctx->cfg.rc_min_quantizer = params->min_quantizer;
 
   set_vp9e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg);
   vp9_change_config(ctx->cpi, &ctx->oxcf);
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 92c6cd20c..881a7d152 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -15,6 +15,7 @@
 #include "vpx/vp8dx.h"
 #include "vpx/internal/vpx_codec_internal.h"
 #include "./vpx_version.h"
+#include "vp9/common/vp9_frame_buffers.h"
 #include "vp9/decoder/vp9_onyxd.h"
 #include "vp9/decoder/vp9_onyxd_int.h"
 #include "vp9/decoder/vp9_read_bit_buffer.h"
@@ -148,7 +149,9 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, unsigned int data_sz,
   {
     struct vp9_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL };
     const int frame_marker = vp9_rb_read_literal(&rb, 2);
-    const int version = vp9_rb_read_bit(&rb) | (vp9_rb_read_bit(&rb) << 1);
+    const int version = vp9_rb_read_bit(&rb);
+    (void) vp9_rb_read_bit(&rb);  // unused version bit
+
     if (frame_marker != VP9_FRAME_MARKER)
       return VPX_CODEC_UNSUP_BITSTREAM;
 #if CONFIG_NON420
@@ -291,10 +294,22 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
         ctx->postproc_cfg.noise_level = 0;
       }
 
-      if (!optr)
+      if (!optr) {
         res = VPX_CODEC_ERROR;
-      else
+      } else {
+        VP9D_COMP *const pbi = (VP9D_COMP*)optr;
+        VP9_COMMON *const cm = &pbi->common;
+
+        cm->get_fb_cb = vp9_get_frame_buffer;
+        cm->release_fb_cb = vp9_release_frame_buffer;
+
+        if (vp9_alloc_internal_frame_buffers(&cm->int_frame_buffers))
+          vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+                             "Failed to initialize internal frame buffers");
+        cm->cb_priv = &cm->int_frame_buffers;
+
         ctx->pbi = optr;
+      }
     }
 
     ctx->decoder_init = 1;
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 64f9f094c..c0d973b4f 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -89,7 +89,7 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subpel_variance_impl_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE3) += encoder/x86/vp9_sad_sse3.asm
 
-ifeq ($(USE_X86INC),yes)
+ifeq ($(CONFIG_USE_X86INC),yes)
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad_sse2.asm
 VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_subtract_sse2.asm