28 files changed, 707 insertions, 630 deletions
diff --git a/vp9/common/vp9_mv.h b/vp9/common/vp9_mv.h
index 3eb7f9d61..5d89da8c2 100644
--- a/vp9/common/vp9_mv.h
+++ b/vp9/common/vp9_mv.h
@@ -34,6 +34,14 @@ typedef struct mv32 {
   int32_t col;
 } MV32;
 
+static INLINE int is_zero_mv(const MV *mv) {
+  return *((const uint32_t *)mv) == 0;
+}
+
+static INLINE int is_equal_mv(const MV *a, const MV *b) {
+  return  *((const uint32_t *)a) == *((const uint32_t *)b);
+}
+
 static INLINE void clamp_mv(MV *mv, int min_col, int max_col,
                             int min_row, int max_row) {
   mv->col = clamp(mv->col, min_col, max_col);
diff --git a/vp9/common/vp9_postproc.c b/vp9/common/vp9_postproc.c
index abda4e682..e4e6ce782 100644
--- a/vp9/common/vp9_postproc.c
+++ b/vp9/common/vp9_postproc.c
@@ -366,6 +366,9 @@ void vp9_plane_add_noise_c(uint8_t *start, char *noise,
                            unsigned int width, unsigned int height, int pitch) {
   unsigned int i, j;
 
+  // TODO(jbb): why does simd code use both but c doesn't,  normalize and
+  // fix..
+  (void) bothclamp;
   for (i = 0; i < height; i++) {
     uint8_t *pos = start + i * pitch;
     char  *ref = (char *)(noise + (rand() & 0xff));  // NOLINT
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index 708f41b87..92f93184a 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -305,15 +305,15 @@ specialize qw/vp9_convolve_avg neon_asm dspr2/, "$sse2_x86inc";
 $vp9_convolve_avg_neon_asm=vp9_convolve_avg_neon;
 
 add_proto qw/void vp9_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8 sse2 ssse3 neon_asm dspr2 avx2/;
 $vp9_convolve8_neon_asm=vp9_convolve8_neon;
 
 add_proto qw/void vp9_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_horiz sse2 ssse3 neon_asm dspr2 avx2/;
 $vp9_convolve8_horiz_neon_asm=vp9_convolve8_horiz_neon;
 
 add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2/;
+specialize qw/vp9_convolve8_vert sse2 ssse3 neon_asm dspr2 avx2/;
 $vp9_convolve8_vert_neon_asm=vp9_convolve8_vert_neon;
 
 add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
diff --git a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index d109e136a..3bc7d3918 100644
--- a/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -307,7 +307,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
   __m256i addFilterReg64;
   __m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
   __m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
-  __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32;
+  __m256i srcReg32b11, srcReg32b12, filtersReg32;
   __m256i firstFilters, secondFilters, thirdFilters, forthFilters;
   unsigned int i;
   unsigned int src_stride, dst_stride;
@@ -409,35 +409,35 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
      // multiply 2 adjacent elements with the filter and add the result
      srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
      srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
-     srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
-     srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
 
      // add and saturate the results together
      srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
-     srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8);
-
 
      // multiply 2 adjacent elements with the filter and add the result
      srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
-     srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
-
-     // multiply 2 adjacent elements with the filter and add the result
      srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
-     srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
-
 
      // add and saturate the results together
      srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
                    _mm256_min_epi16(srcReg32b8, srcReg32b12));
-     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
-                  _mm256_min_epi16(srcReg32b6, srcReg32b13));
-
-     // add and saturate the results together
      srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
                    _mm256_max_epi16(srcReg32b8, srcReg32b12));
-     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
-                  _mm256_max_epi16(srcReg32b6, srcReg32b13));
 
+     // multiply 2 adjacent elements with the filter and add the result
+     srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
+     srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
+
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6);
+
+     // multiply 2 adjacent elements with the filter and add the result
+     srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
+     srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
+
+     // add and saturate the results together
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+                  _mm256_min_epi16(srcReg32b8, srcReg32b12));
+     srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+                  _mm256_max_epi16(srcReg32b8, srcReg32b12));
 
      srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
      srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index a0fff458c..572ab0efc 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -627,11 +627,12 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
                        "Width and height beyond allowed size.");
 #endif
   if (cm->width != width || cm->height != height) {
-    const int new_rows = ALIGN_POWER_OF_TWO(height,
-                                            MI_SIZE_LOG2) >> MI_SIZE_LOG2;
-    const int new_cols = ALIGN_POWER_OF_TWO(width,
-                                            MI_SIZE_LOG2) >> MI_SIZE_LOG2;
-    if (calc_mi_size(new_rows) * calc_mi_size(new_cols) > cm->mi_alloc_size) {
+    const int new_mi_rows =
+        calc_mi_size(ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2);
+    const int new_mi_cols =
+        calc_mi_size(ALIGN_POWER_OF_TWO(width,  MI_SIZE_LOG2) >> MI_SIZE_LOG2);
+    if (new_mi_cols > cm->mi_stride ||
+        (new_mi_rows * new_mi_cols > cm->mi_alloc_size)) {
       if (vp9_alloc_context_buffers(cm, width, height))
         vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate context buffers");
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index 1a4155825..e79dcf371 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -314,3 +314,67 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
   vp9_clear_system_state();
   return ret;
 }
+
+vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
+                                           size_t data_sz,
+                                           uint32_t sizes[8], int *count,
+                                           vpx_decrypt_cb decrypt_cb,
+                                           void *decrypt_state) {
+  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+  // it is a super frame index. If the last byte of real video compression
+  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+  // not the associated matching marker byte at the front of the index we have
+  // an invalid bitstream and need to return an error.
+
+  uint8_t marker;
+
+  assert(data_sz);
+  marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
+  *count = 0;
+
+  if ((marker & 0xe0) == 0xc0) {
+    const uint32_t frames = (marker & 0x7) + 1;
+    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+    const size_t index_sz = 2 + mag * frames;
+
+    // This chunk is marked as having a superframe index but doesn't have
+    // enough data for it, thus it's an invalid superframe index.
+    if (data_sz < index_sz)
+      return VPX_CODEC_CORRUPT_FRAME;
+
+    {
+      const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
+                                          data + data_sz - index_sz);
+
+      // This chunk is marked as having a superframe index but doesn't have
+      // the matching marker byte at the front of the index therefore it's an
+      // invalid chunk.
+      if (marker != marker2)
+        return VPX_CODEC_CORRUPT_FRAME;
+    }
+
+    {
+      // Found a valid superframe index.
+      uint32_t i, j;
+      const uint8_t *x = &data[data_sz - index_sz + 1];
+
+      // Frames has a maximum of 8 and mag has a maximum of 4.
+      uint8_t clear_buffer[32];
+      assert(sizeof(clear_buffer) >= frames * mag);
+      if (decrypt_cb) {
+        decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
+        x = clear_buffer;
+      }
+
+      for (i = 0; i < frames; ++i) {
+        uint32_t this_sz = 0;
+
+        for (j = 0; j < mag; ++j)
+          this_sz |= (*x++) << (j * 8);
+        sizes[i] = this_sz;
+      }
+      *count = frames;
+    }
+  }
+  return VPX_CODEC_OK;
+}
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 223b66fc7..848d212e6 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -78,6 +78,25 @@ struct VP9Decoder *vp9_decoder_create();
 
 void vp9_decoder_remove(struct VP9Decoder *pbi);
 
+static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb,
+                                  void *decrypt_state,
+                                  const uint8_t *data) {
+  if (decrypt_cb) {
+    uint8_t marker;
+    decrypt_cb(decrypt_state, data, &marker, 1);
+    return marker;
+  }
+  return *data;
+}
+
+// This function is exposed for use in tests, as well as the inlined function
+// "read_marker".
+vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
+                                           size_t data_sz,
+                                           uint32_t sizes[8], int *count,
+                                           vpx_decrypt_cb decrypt_cb,
+                                           void *decrypt_state);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 2d5ec79b3..8c13d0da6 100644
--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -28,7 +28,6 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
                           int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
                           int zbin_oq_value, uint16_t *eob_ptr,
                           const int16_t *scan, const int16_t *iscan) {
-  int i;
   // TODO(jingning) Decide the need of these arguments after the
   // quantization process is completed.
   (void)zbin_ptr;
@@ -39,7 +38,7 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
   if (!skip_block) {
     // Quantization pass: All coefficients with index >= zero_flag are
     // skippable. Note: zero_flag can be zero.
-
+    int i;
     const int16x8_t v_zero = vdupq_n_s16(0);
     const int16x8_t v_one = vdupq_n_s16(1);
     int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1);
@@ -50,13 +49,37 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
     v_round = vsetq_lane_s16(round_ptr[0], v_round, 0);
     v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0);
     v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0);
-
-    for (i = 0; i < count; i += 8) {
+    // process dc and the first seven ac coeffs
+    {
+      const int16x8_t v_iscan = vld1q_s16(&iscan[0]);
+      const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]);
+      const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
+      const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
+      const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
+                                           vget_low_s16(v_quant));
+      const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
+                                           vget_high_s16(v_quant));
+      const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16),
+                                            vshrn_n_s32(v_tmp_hi, 16));
+      const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
+      const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
+      const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
+      const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
+      const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
+      const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
+      v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
+      vst1q_s16(&qcoeff_ptr[0], v_qcoeff);
+      vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff);
+      v_round = vmovq_n_s16(round_ptr[1]);
+      v_quant = vmovq_n_s16(quant_ptr[1]);
+      v_dequant = vmovq_n_s16(dequant_ptr[1]);
+    }
+    // now process the rest of the ac coeffs
+    for (i = 8; i < count; i += 8) {
       const int16x8_t v_iscan = vld1q_s16(&iscan[i]);
       const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]);
       const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);
-      const int16x8_t v_abs_coeff = vabsq_s16(v_coeff);
-      const int16x8_t v_tmp = vqaddq_s16(v_abs_coeff, v_round);
+      const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);
       const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp),
                                            vget_low_s16(v_quant));
       const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp),
@@ -65,19 +88,13 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count,
                                             vshrn_n_s32(v_tmp_hi, 16));
       const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero);
       const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one);
-      const int16x8_t v_nz_iscan =
-          vandq_s16(vmvnq_s16(vreinterpretq_s16_u16(v_nz_mask)), v_iscan_plus1);
+      const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1);
       const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign);
       const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign);
       const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant);
-
       v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan);
-
       vst1q_s16(&qcoeff_ptr[i], v_qcoeff);
       vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff);
-      v_round = vmovq_n_s16(round_ptr[1]);
-      v_quant = vmovq_n_s16(quant_ptr[1]);
-      v_dequant = vmovq_n_s16(dequant_ptr[1]);
     }
     {
       const int16x4_t v_eobmax_3210 =
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index bdb133882..5c304462a 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -294,6 +294,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
       vp9_write_token(w, vp9_switchable_interp_tree,
                       cm->fc.switchable_interp_prob[ctx],
                       &switchable_interp_encodings[mbmi->interp_filter]);
+      ++cpi->interp_filter_selected[0][mbmi->interp_filter];
     } else {
       assert(mbmi->interp_filter == cm->interp_filter);
     }
@@ -1083,7 +1084,16 @@ static void write_uncompressed_header(VP9_COMP *cpi,
     write_bitdepth_colorspace_sampling(cm, wb);
     write_frame_size(cm, wb);
   } else {
-    if (!cm->show_frame)
+    // In spatial svc if it's not error_resilient_mode then we need to code all
+    // visible frames as invisible. But we need to keep the show_frame flag so
+    // that the publisher could know whether it is supposed to be visible.
+    // So we will code the show_frame flag as it is. Then code the intra_only
+    // bit here. This will make the bitstream incompatible. In the player we
+    // will change to show_frame flag to 0, then add an one byte frame with
+    // show_existing_frame flag which tells the decoder which frame we want to
+    // show.
+    if (!cm->show_frame ||
+        (is_spatial_svc(cpi) && cm->error_resilient_mode == 0))
       vp9_wb_write_bit(wb, cm->intra_only);
 
     if (!cm->error_resilient_mode)
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 90ea9cc25..793a9da07 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -370,8 +370,8 @@ void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
   ctx->newmv_sse = UINT_MAX;
 }
 
-void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
-                                     unsigned int sse, PREDICTION_MODE mode,
+void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
+                                     PREDICTION_MODE mode,
                                      PICK_MODE_CONTEXT *ctx) {
   // TODO(tkopp): Use both MVs if possible
   if (mbmi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index d93846ff9..8a91492b7 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -42,7 +42,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
 
 void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
 
-void vp9_denoiser_update_frame_stats(VP9_DENOISER *denoiser, MB_MODE_INFO *mbmi,
+void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi,
                                      unsigned int sse, PREDICTION_MODE mode,
                                      PICK_MODE_CONTEXT *ctx);
 
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 711354b7f..950a6c8bb 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1232,30 +1232,23 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
   }
 }
 
-static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
+static int is_background(const VP9_COMP *cpi, const TileInfo *const tile,
                          int mi_row, int mi_col) {
-  MACROBLOCK *x = &cpi->mb;
-  uint8_t *src, *pre;
-  int src_stride, pre_stride;
-
+  // This assumes the input source frames are of the same dimension.
   const int row8x8_remaining = tile->mi_row_end - mi_row;
   const int col8x8_remaining = tile->mi_col_end - mi_col;
-
+  const int x = mi_col * MI_SIZE;
+  const int y = mi_row * MI_SIZE;
+  const int src_stride = cpi->Source->y_stride;
+  const uint8_t *const src = &cpi->Source->y_buffer[y * src_stride + x];
+  const int pre_stride = cpi->Last_Source->y_stride;
+  const uint8_t *const pre = &cpi->Last_Source->y_buffer[y * pre_stride + x];
   int this_sad = 0;
   int threshold = 0;
 
-  // This assumes the input source frames are of the same dimension.
-  src_stride = cpi->Source->y_stride;
-  src = cpi->Source->y_buffer + (mi_row * MI_SIZE) * src_stride +
-            (mi_col * MI_SIZE);
-  pre_stride = cpi->Last_Source->y_stride;
-  pre = cpi->Last_Source->y_buffer + (mi_row * MI_SIZE) * pre_stride +
-          (mi_col * MI_SIZE);
-
   if (row8x8_remaining >= MI_BLOCK_SIZE &&
       col8x8_remaining >= MI_BLOCK_SIZE) {
-    this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride,
-                                            pre, pre_stride);
+    this_sad = cpi->fn_ptr[BLOCK_64X64].sdf(src, src_stride, pre, pre_stride);
     threshold = (1 << 12);
   } else {
     int r, c;
@@ -1266,8 +1259,7 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
     threshold = (row8x8_remaining * col8x8_remaining) << 6;
   }
 
-  x->in_static_area = (this_sad < 2 * threshold);
-  return x->in_static_area;
+  return this_sad < 2 * threshold;
 }
 
 static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8,
@@ -3119,7 +3111,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
         break;
       case REFERENCE_PARTITION:
         if (sf->partition_check ||
-            !is_background(cpi, tile, mi_row, mi_col)) {
+            !(x->in_static_area = is_background(cpi, tile, mi_row, mi_col))) {
           set_modeinfo_offsets(cm, xd, mi_row, mi_col);
           auto_partition_range(cpi, tile, mi_row, mi_col,
                                &sf->min_partition_size,
@@ -3297,7 +3289,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
 
   vp9_zero(cm->counts);
   vp9_zero(cpi->coef_counts);
-  vp9_zero(cpi->tx_stepdown_count);
   vp9_zero(rd_opt->comp_pred_diff);
   vp9_zero(rd_opt->filter_diff);
   vp9_zero(rd_opt->tx_select_diff);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 7a9ff5b28..8464882ea 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -131,8 +131,10 @@ static void setup_frame(VP9_COMP *cpi) {
     if (!is_spatial_svc(cpi))
       cpi->refresh_golden_frame = 1;
     cpi->refresh_alt_ref_frame = 1;
+    vp9_zero(cpi->interp_filter_selected);
   } else {
     cm->fc = cm->frame_contexts[cm->frame_context_idx];
+    vp9_zero(cpi->interp_filter_selected[0]);
   }
 }
 
@@ -559,10 +561,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
   // Temporal scalability.
   cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
 
-  if ((cpi->svc.number_temporal_layers > 1 &&
-      cpi->oxcf.rc_mode == VPX_CBR) ||
-      (cpi->svc.number_spatial_layers > 1 &&
-      cpi->oxcf.mode == TWO_PASS_SECOND_BEST)) {
+  if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ||
+      (cpi->svc.number_spatial_layers > 1 && cpi->oxcf.pass == 2)) {
     vp9_init_layer_context(cpi);
   }
 
@@ -577,6 +577,20 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
   set_tile_limits(cpi);
 }
 
+static void set_rc_buffer_sizes(RATE_CONTROL *rc,
+                                const VP9EncoderConfig *oxcf) {
+  const int64_t bandwidth = oxcf->target_bandwidth;
+  const int64_t starting = oxcf->starting_buffer_level_ms;
+  const int64_t optimal = oxcf->optimal_buffer_level_ms;
+  const int64_t maximum = oxcf->maximum_buffer_size_ms;
+
+  rc->starting_buffer_level = starting * bandwidth / 1000;
+  rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8
+                                            : optimal * bandwidth / 1000;
+  rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8
+                                           : maximum * bandwidth / 1000;
+}
+
 void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -610,28 +624,8 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
   }
   cpi->encode_breakout = cpi->oxcf.encode_breakout;
 
-  // local file playback mode == really big buffer
-  if (cpi->oxcf.rc_mode == VPX_VBR) {
-    cpi->oxcf.starting_buffer_level_ms = 60000;
-    cpi->oxcf.optimal_buffer_level_ms = 60000;
-    cpi->oxcf.maximum_buffer_size_ms = 240000;
-  }
-
-  rc->starting_buffer_level = cpi->oxcf.starting_buffer_level_ms *
-                                  cpi->oxcf.target_bandwidth / 1000;
-
-  // Set or reset optimal and maximum buffer levels.
-  if (cpi->oxcf.optimal_buffer_level_ms == 0)
-    rc->optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
-  else
-    rc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level_ms *
-                                   cpi->oxcf.target_bandwidth / 1000;
+  set_rc_buffer_sizes(rc, &cpi->oxcf);
 
-  if (cpi->oxcf.maximum_buffer_size_ms == 0)
-    rc->maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
-  else
-    rc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size_ms *
-                                  cpi->oxcf.target_bandwidth / 1000;
   // Under a configuration change, where maximum_buffer_size may change,
   // keep buffer level clipped to the maximum allowed buffer size.
   rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
@@ -757,11 +751,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
 
   cm->current_video_frame = 0;
-
-  cpi->gold_is_last = 0;
-  cpi->alt_is_last = 0;
-  cpi->gold_is_alt = 0;
-
   cpi->skippable_frame = 0;
 
   // Create the encoder segmentation map and set all entries to 0
@@ -904,8 +893,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
   kf_list = fopen("kf_list.stt", "w");
 #endif
 
-  cpi->output_pkt_list = oxcf->output_pkt_list;
-
   cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
 
   if (oxcf->pass == 1) {
@@ -1571,17 +1558,32 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
 
       ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
+      vpx_memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
+                 cpi->interp_filter_selected[0],
+                 sizeof(cpi->interp_filter_selected[0]));
     }
 
     if (cpi->refresh_golden_frame) {
       ref_cnt_fb(cm->frame_bufs,
                  &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx);
+      if (!cpi->rc.is_src_frame_alt_ref)
+        vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+                   cpi->interp_filter_selected[0],
+                   sizeof(cpi->interp_filter_selected[0]));
+      else
+        vpx_memcpy(cpi->interp_filter_selected[GOLDEN_FRAME],
+                   cpi->interp_filter_selected[ALTREF_FRAME],
+                   sizeof(cpi->interp_filter_selected[ALTREF_FRAME]));
     }
   }
 
   if (cpi->refresh_last_frame) {
     ref_cnt_fb(cm->frame_bufs,
                &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx);
+    if (!cpi->rc.is_src_frame_alt_ref)
+      vpx_memcpy(cpi->interp_filter_selected[LAST_FRAME],
+                 cpi->interp_filter_selected[0],
+                 sizeof(cpi->interp_filter_selected[0]));
   }
 #if CONFIG_VP9_TEMPORAL_DENOISING
   if (cpi->oxcf.noise_sensitivity > 0) {
@@ -1959,36 +1961,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
   } while (loop);
 }
 
-static void get_ref_frame_flags(VP9_COMP *cpi) {
-  if (cpi->refresh_last_frame & cpi->refresh_golden_frame)
-    cpi->gold_is_last = 1;
-  else if (cpi->refresh_last_frame ^ cpi->refresh_golden_frame)
-    cpi->gold_is_last = 0;
+static int get_ref_frame_flags(const VP9_COMP *cpi) {
+  const int *const map = cpi->common.ref_frame_map;
+  const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx];
+  const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx];
+  const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx];
+  int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
 
-  if (cpi->refresh_last_frame & cpi->refresh_alt_ref_frame)
-    cpi->alt_is_last = 1;
-  else if (cpi->refresh_last_frame ^ cpi->refresh_alt_ref_frame)
-    cpi->alt_is_last = 0;
-
-  if (cpi->refresh_alt_ref_frame & cpi->refresh_golden_frame)
-    cpi->gold_is_alt = 1;
-  else if (cpi->refresh_alt_ref_frame ^ cpi->refresh_golden_frame)
-    cpi->gold_is_alt = 0;
-
-  cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
-
-  if (cpi->gold_is_last)
-    cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+  if (gold_is_last)
+    flags &= ~VP9_GOLD_FLAG;
 
   if (cpi->rc.frames_till_gf_update_due == INT_MAX &&
       !is_spatial_svc(cpi))
-    cpi->ref_frame_flags &= ~VP9_GOLD_FLAG;
+    flags &= ~VP9_GOLD_FLAG;
 
-  if (cpi->alt_is_last)
-    cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+  if (alt_is_last)
+    flags &= ~VP9_ALT_FLAG;
 
-  if (cpi->gold_is_alt)
-    cpi->ref_frame_flags &= ~VP9_ALT_FLAG;
+  if (gold_is_alt)
+    flags &= ~VP9_ALT_FLAG;
+
+  return flags;
 }
 
 static void set_ext_overrides(VP9_COMP *cpi) {
@@ -2057,19 +2050,69 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
   cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
 }
 
+static void set_mv_search_params(VP9_COMP *cpi) {
+  const VP9_COMMON *const cm = &cpi->common;
+  const unsigned int max_mv_def = MIN(cm->width, cm->height);
+
+  // Default based on max resolution.
+  cpi->mv_step_param = vp9_init_search_range(max_mv_def);
+
+  if (cpi->sf.mv.auto_mv_step_size) {
+    if (frame_is_intra_only(cm)) {
+      // Initialize max_mv_magnitude for use in the first INTER frame
+      // after a key/intra-only frame.
+      cpi->max_mv_magnitude = max_mv_def;
+    } else {
+      if (cm->show_frame)
+        // Allow mv_steps to correspond to twice the max mv magnitude found
+        // in the previous frame, capped by the default max_mv_magnitude based
+        // on resolution.
+        cpi->mv_step_param =
+            vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+      cpi->max_mv_magnitude = 0;
+    }
+  }
+}
+
+
+int setup_interp_filter_search_mask(VP9_COMP *cpi) {
+  INTERP_FILTER ifilter;
+  int ref_total[MAX_REF_FRAMES] = {0};
+  MV_REFERENCE_FRAME ref;
+  int mask = 0;
+  if (cpi->common.last_frame_type == KEY_FRAME ||
+      cpi->refresh_alt_ref_frame)
+    return mask;
+  for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref)
+    for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter)
+      ref_total[ref] += cpi->interp_filter_selected[ref][ifilter];
+
+  for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) {
+    if ((ref_total[LAST_FRAME] &&
+        cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) &&
+        (ref_total[GOLDEN_FRAME] == 0 ||
+         cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50
+           < ref_total[GOLDEN_FRAME]) &&
+        (ref_total[ALTREF_FRAME] == 0 ||
+         cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50
+           < ref_total[ALTREF_FRAME]))
+      mask |= 1 << ifilter;
+  }
+  return mask;
+}
+
 static void encode_frame_to_data_rate(VP9_COMP *cpi,
                                       size_t *size,
                                       uint8_t *dest,
                                       unsigned int *frame_flags) {
   VP9_COMMON *const cm = &cpi->common;
+  const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+  struct segmentation *const seg = &cm->seg;
   TX_SIZE t;
   int q;
   int top_index;
   int bottom_index;
 
-  const SPEED_FEATURES *const sf = &cpi->sf;
-  const unsigned int max_mv_def = MIN(cm->width, cm->height);
-  struct segmentation *const seg = &cm->seg;
   set_ext_overrides(cpi);
 
   cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
@@ -2095,24 +2138,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // Set default state for segment based loop filter update flags.
   cm->lf.mode_ref_delta_update = 0;
 
-  // Initialize cpi->mv_step_param to default based on max resolution.
-  cpi->mv_step_param = vp9_init_search_range(max_mv_def);
-  // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
-  if (sf->mv.auto_mv_step_size) {
-    if (frame_is_intra_only(cm)) {
-      // Initialize max_mv_magnitude for use in the first INTER frame
-      // after a key/intra-only frame.
-      cpi->max_mv_magnitude = max_mv_def;
-    } else {
-      if (cm->show_frame)
-        // Allow mv_steps to correspond to twice the max mv magnitude found
-        // in the previous frame, capped by the default max_mv_magnitude based
-        // on resolution.
-        cpi->mv_step_param = vp9_init_search_range(MIN(max_mv_def, 2 *
-                                 cpi->max_mv_magnitude));
-      cpi->max_mv_magnitude = 0;
-    }
-  }
+  set_mv_search_params(cpi);
+
+  if (cpi->oxcf.pass == 2 &&
+      cpi->sf.adaptive_interp_filter_search)
+    cpi->sf.interp_filter_search_mask =
+        setup_interp_filter_search_mask(cpi);
+
 
   // Set various flags etc to special state if it is a key frame.
   if (frame_is_intra_only(cm)) {
@@ -2128,9 +2160,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     // The alternate reference frame cannot be active for a key frame.
     cpi->rc.source_alt_ref_active = 0;
 
-    cm->error_resilient_mode = (cpi->oxcf.error_resilient_mode != 0);
-    cm->frame_parallel_decoding_mode =
-      (cpi->oxcf.frame_parallel_decoding_mode != 0);
+    cm->error_resilient_mode = oxcf->error_resilient_mode;
 
     // By default, encoder assumes decoder can use prev_mi.
     if (cm->error_resilient_mode) {
@@ -2138,29 +2168,43 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
       cm->reset_frame_context = 0;
       cm->refresh_frame_context = 0;
     } else if (cm->intra_only) {
+      cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
       // Only reset the current context.
       cm->reset_frame_context = 2;
     }
   }
+  if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {
+    cm->frame_context_idx = cpi->svc.spatial_layer_id;
+
+    // The probs will be updated based on the frame type of its previous
+    // frame if frame_parallel_decoding_mode is 0. The type may vary for
+    // the frame after a key frame in base layer since we may drop enhancement
+    // layers. So set frame_parallel_decoding_mode to 1 in this case.
+    if (cpi->svc.spatial_layer_id == 0 &&
+        cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
+      cm->frame_parallel_decoding_mode = 1;
+    else
+      cm->frame_parallel_decoding_mode = 0;
+  }
 
   // Configure experimental use of segmentation for enhanced coding of
   // static regions if indicated.
   // Only allowed in second pass of two pass (as requires lagged coding)
   // and if the relevant speed feature flag is set.
-  if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation)
+  if (oxcf->pass == 2 && cpi->sf.static_segmentation)
     configure_static_seg_features(cpi);
 
   // Check if the current frame is skippable for the partition search in the
   // second pass according to the first pass stats
-  if (cpi->oxcf.pass == 2 &&
+  if (oxcf->pass == 2 &&
       (!cpi->use_svc || is_spatial_svc(cpi))) {
     configure_skippable_frame(cpi);
   }
 
   // For 1 pass CBR, check if we are dropping this frame.
   // Never drop on key frame.
-  if (cpi->oxcf.pass == 0 &&
-      cpi->oxcf.rc_mode == VPX_CBR &&
+  if (oxcf->pass == 0 &&
+      oxcf->rc_mode == VPX_CBR &&
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
       vp9_rc_postencode_update_drop_frame(cpi);
@@ -2172,9 +2216,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   vp9_clear_system_state();
 
 #if CONFIG_VP9_POSTPROC
-  if (cpi->oxcf.noise_sensitivity > 0) {
+  if (oxcf->noise_sensitivity > 0) {
     int l = 0;
-    switch (cpi->oxcf.noise_sensitivity) {
+    switch (oxcf->noise_sensitivity) {
       case 1:
         l = 20;
         break;
@@ -2215,7 +2259,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
 #ifdef OUTPUT_YUV_DENOISED
-  if (cpi->oxcf.noise_sensitivity > 0) {
+  if (oxcf->noise_sensitivity > 0) {
     vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME],
                             yuv_denoised_file);
   }
@@ -2290,7 +2334,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   else
     cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
 
-  get_ref_frame_flags(cpi);
+  cpi->ref_frame_flags = get_ref_frame_flags(cpi);
 
   cm->last_frame_type = cm->frame_type;
   vp9_rc_postencode_update(cpi, *size);
@@ -2317,8 +2361,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cm->last_height = cm->height;
 
   // reset to normal state now that we are done.
-  if (!cm->show_existing_frame)
-    cm->last_show_frame = cm->show_frame;
+  if (!cm->show_existing_frame) {
+    if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)
+      cm->last_show_frame = 0;
+    else
+      cm->last_show_frame = cm->show_frame;
+  }
 
   if (cm->show_frame) {
     vp9_swap_mi_and_prev_mi(cm);
@@ -2329,6 +2377,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     if (cpi->use_svc)
       vp9_inc_frame_in_layer(&cpi->svc);
   }
+
+  if (is_spatial_svc(cpi))
+    cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
+        cm->frame_type;
 }
 
 static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -2443,8 +2495,8 @@ static int frame_is_reference(const VP9_COMP *cpi) {
          cm->seg.update_data;
 }
 
-void adjust_frame_rate(VP9_COMP *cpi) {
-  const struct lookahead_entry *const source = cpi->source;
+void adjust_frame_rate(VP9_COMP *cpi,
+                       const struct lookahead_entry *source) {
   int64_t this_duration;
   int step = 0;
 
@@ -2500,7 +2552,8 @@ static int get_arf_src_index(VP9_COMP *cpi) {
   return arf_src_index;
 }
 
-static void check_src_altref(VP9_COMP *cpi) {
+static void check_src_altref(VP9_COMP *cpi,
+                             const struct lookahead_entry *source) {
   RATE_CONTROL *const rc = &cpi->rc;
 
   if (cpi->oxcf.pass == 2) {
@@ -2509,7 +2562,7 @@ static void check_src_altref(VP9_COMP *cpi) {
       (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE);
   } else {
     rc->is_src_frame_alt_ref = cpi->alt_ref_source &&
-                               (cpi->source == cpi->alt_ref_source);
+                               (source == cpi->alt_ref_source);
   }
 
   if (rc->is_src_frame_alt_ref) {
@@ -2531,6 +2584,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
   RATE_CONTROL *const rc = &cpi->rc;
   struct vpx_usec_timer  cmptimer;
   YV12_BUFFER_CONFIG *force_src_buffer = NULL;
+  struct lookahead_entry *last_source = NULL;
+  struct lookahead_entry *source = NULL;
   MV_REFERENCE_FRAME ref_frame;
   int arf_src_index;
 
@@ -2543,9 +2598,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   vpx_usec_timer_start(&cmptimer);
 
-  cpi->source = NULL;
-  cpi->last_source = NULL;
-
   vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV);
 
   // Normal defaults
@@ -2562,13 +2614,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
 #if CONFIG_SPATIAL_SVC
     if (is_spatial_svc(cpi))
-      cpi->source = vp9_svc_lookahead_peek(cpi, cpi->lookahead,
-                                           arf_src_index, 0);
+      source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, arf_src_index, 0);
     else
 #endif
-      cpi->source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
-    if (cpi->source != NULL) {
-      cpi->alt_ref_source = cpi->source;
+      source = vp9_lookahead_peek(cpi->lookahead, arf_src_index);
+    if (source != NULL) {
+      cpi->alt_ref_source = source;
 
 #if CONFIG_SPATIAL_SVC
       if (is_spatial_svc(cpi) && cpi->svc.spatial_layer_id > 0) {
@@ -2602,46 +2653,44 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     }
   }
 
-  if (!cpi->source) {
+  if (!source) {
     // Get last frame source.
     if (cm->current_video_frame > 0) {
 #if CONFIG_SPATIAL_SVC
       if (is_spatial_svc(cpi))
-        cpi->last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
+        last_source = vp9_svc_lookahead_peek(cpi, cpi->lookahead, -1, 0);
       else
 #endif
-        cpi->last_source = vp9_lookahead_peek(cpi->lookahead, -1);
-      if (cpi->last_source == NULL)
+        last_source = vp9_lookahead_peek(cpi->lookahead, -1);
+      if (last_source == NULL)
         return -1;
     }
 
     // Read in the source frame.
 #if CONFIG_SPATIAL_SVC
     if (is_spatial_svc(cpi))
-      cpi->source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
+      source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
     else
 #endif
-      cpi->source = vp9_lookahead_pop(cpi->lookahead, flush);
-    if (cpi->source != NULL) {
+      source = vp9_lookahead_pop(cpi->lookahead, flush);
+    if (source != NULL) {
       cm->show_frame = 1;
       cm->intra_only = 0;
 
       // Check to see if the frame should be encoded as an arf overlay.
-      check_src_altref(cpi);
+      check_src_altref(cpi, source);
     }
   }
 
-  if (cpi->source) {
+  if (source) {
     cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer
-                                                           : &cpi->source->img;
+                                                           : &source->img;
 
-    cpi->unscaled_last_source = cpi->last_source != NULL ?
-                                    &cpi->last_source->img : NULL;
+    cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL;
 
-    *time_stamp = cpi->source->ts_start;
-    *time_end = cpi->source->ts_end;
-    *frame_flags =
-        (cpi->source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
+    *time_stamp = source->ts_start;
+    *time_end = source->ts_end;
+    *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
 
   } else {
     *size = 0;
@@ -2652,9 +2701,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     return -1;
   }
 
-  if (cpi->source->ts_start < cpi->first_time_stamp_ever) {
-    cpi->first_time_stamp_ever = cpi->source->ts_start;
-    cpi->last_end_time_stamp_seen = cpi->source->ts_start;
+  if (source->ts_start < cpi->first_time_stamp_ever) {
+    cpi->first_time_stamp_ever = source->ts_start;
+    cpi->last_end_time_stamp_seen = source->ts_start;
   }
 
   // Clear down mmx registers
@@ -2662,7 +2711,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
 
   // adjust frame rates based on timestamps given
   if (cm->show_frame) {
-    adjust_frame_rate(cpi);
+    adjust_frame_rate(cpi, source);
   }
 
   if (cpi->svc.number_temporal_layers > 1 &&
@@ -2734,7 +2783,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
     const int lossless = is_lossless_requested(oxcf);
     cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4;
     cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
-    vp9_first_pass(cpi);
+    vp9_first_pass(cpi, source);
   } else if (oxcf->pass == 2 &&
       (!cpi->use_svc || is_spatial_svc(cpi))) {
     Pass2Encode(cpi, size, dest, frame_flags);
@@ -2812,12 +2861,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
           cpi->totalp_sq_error += psnr2.sse[0];
           cpi->totalp_samples += psnr2.samples[0];
 
-          frame_ssim2 = vp9_calc_ssim(orig, recon, 1, &weight);
+          frame_ssim2 = vp9_calc_ssim(orig, recon, &weight);
 
           cpi->summed_quality += frame_ssim2 * weight;
           cpi->summed_weights += weight;
 
-          frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, 1, &weight);
+          frame_ssim2 = vp9_calc_ssim(orig, &cm->post_proc_buffer, &weight);
 
           cpi->summedp_quality += frame_ssim2 * weight;
           cpi->summedp_weights += weight;
@@ -2833,6 +2882,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
         }
       }
 
+
       if (cpi->b_calculate_ssimg) {
         double y, u, v, frame_all;
         frame_all = vp9_calc_ssimg(cpi->Source, cm->frame_to_show, &y, &u, &v);
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index e6506a6ed..2dba67c54 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -82,36 +82,19 @@ typedef enum {
 } VPX_SCALING;
 
 typedef enum {
-  // Good Quality Fast Encoding. The encoder balances quality with the
-  // amount of time it takes to encode the output. (speed setting
-  // controls how fast)
-  ONE_PASS_GOOD = 1,
-
-  // One Pass - Best Quality. The encoder places priority on the
-  // quality of the output over encoding speed. The output is compressed
-  // at the highest possible quality. This option takes the longest
-  // amount of time to encode. (speed setting ignored)
-  ONE_PASS_BEST = 2,
-
-  // Two Pass - First Pass. The encoder generates a file of statistics
-  // for use in the second encoding pass. (speed setting controls how fast)
-  TWO_PASS_FIRST = 3,
-
-  // Two Pass - Second Pass. The encoder uses the statistics that were
-  // generated in the first encoding pass to create the compressed
-  // output. (speed setting controls how fast)
-  TWO_PASS_SECOND_GOOD = 4,
-
-  // Two Pass - Second Pass Best.  The encoder uses the statistics that
-  // were generated in the first encoding pass to create the compressed
-  // output using the highest possible quality, and taking a
-  // longer amount of time to encode. (speed setting ignored)
-  TWO_PASS_SECOND_BEST = 5,
-
-  // Realtime/Live Encoding. This mode is optimized for realtime
-  // encoding (for example, capturing a television signal or feed from
-  // a live camera). (speed setting controls how fast)
-  REALTIME = 6,
+  // Good Quality Fast Encoding. The encoder balances quality with the amount of
+  // time it takes to encode the output. Speed setting controls how fast.
+  GOOD,
+
+  // The encoder places priority on the quality of the output over encoding
+  // speed. The output is compressed at the highest possible quality. This
+  // option takes the longest amount of time to encode. Speed setting ignored.
+  BEST,
+
+  // Realtime/Live Encoding. This mode is optimized for realtime encoding (for
+  // example, capturing a television signal or feed from a live camera). Speed
+  // setting controls how fast.
+  REALTIME
 } MODE;
 
 typedef enum {
@@ -225,11 +208,11 @@ typedef struct VP9EncoderConfig {
   int tile_columns;
   int tile_rows;
 
-  struct vpx_fixed_buf         two_pass_stats_in;
-  struct vpx_codec_pkt_list  *output_pkt_list;
+  vpx_fixed_buf_t two_pass_stats_in;
+  struct vpx_codec_pkt_list *output_pkt_list;
 
 #if CONFIG_FP_MB_STATS
-  struct vpx_fixed_buf         firstpass_mb_stats_in;
+  vpx_fixed_buf_t firstpass_mb_stats_in;
 #endif
 
   vp8e_tuning tuning;
@@ -240,19 +223,13 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
   return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
 }
 
-static INLINE int is_best_mode(MODE mode) {
-  return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
-}
-
 typedef struct VP9_COMP {
   QUANTS quants;
   MACROBLOCK mb;
   VP9_COMMON common;
   VP9EncoderConfig oxcf;
   struct lookahead_ctx    *lookahead;
-  struct lookahead_entry  *source;
   struct lookahead_entry  *alt_ref_source;
-  struct lookahead_entry  *last_source;
 
   YV12_BUFFER_CONFIG *Source;
   YV12_BUFFER_CONFIG *Last_Source;  // NULL for first frame and alt_ref frames
@@ -261,10 +238,6 @@ typedef struct VP9_COMP {
   YV12_BUFFER_CONFIG *unscaled_last_source;
   YV12_BUFFER_CONFIG scaled_last_source;
 
-  int gold_is_last;  // gold same as last frame ( short circuit gold searches)
-  int alt_is_last;  // Alt same as last ( short circuit altref search)
-  int gold_is_alt;  // don't do both alt and gold search ( just do gold).
-
   int skippable_frame;
 
   int scaled_ref_idx[3];
@@ -312,6 +285,7 @@ typedef struct VP9_COMP {
   double framerate;
 
   vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
+  int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
 
   struct vpx_codec_pkt_list  *output_pkt_list;
 
@@ -399,8 +373,6 @@ typedef struct VP9_COMP {
 
   int dummy_packing;    /* flag to indicate if packing is dummy */
 
-  unsigned int tx_stepdown_count[TX_SIZES];
-
   int initial_width;
   int initial_height;
 
@@ -500,14 +472,6 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
       .buf;
 }
 
-// Intra only frames, golden frames (except alt ref overlays) and
-// alt ref frames tend to be coded at a higher than ambient quality
-static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
-  return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
-         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
-         vp9_is_upper_layer_key_frame(cpi);
-}
-
 static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
   // TODO(JBB): double check we can't exceed this token count if we have a
   // 32x32 transform crossing a boundary at a multiple of 16.
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 295e43777..7867dc244 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -396,7 +396,7 @@ static void set_first_pass_params(VP9_COMP *cpi) {
   cpi->rc.frames_to_key = INT_MAX;
 }
 
-void vp9_first_pass(VP9_COMP *cpi) {
+void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
   int mb_row, mb_col;
   MACROBLOCK *const x = &cpi->mb;
   VP9_COMMON *const cm = &cpi->common;
@@ -428,10 +428,12 @@ void vp9_first_pass(VP9_COMP *cpi) {
   int neutral_count = 0;
   int new_mv_count = 0;
   int sum_in_vectors = 0;
-  uint32_t lastmv_as_int = 0;
+  MV lastmv = {0, 0};
   TWO_PASS *twopass = &cpi->twopass;
   const MV zero_mv = {0, 0};
   const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
+  LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?
+        &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
 
 #if CONFIG_FP_MB_STATS
   if (cpi->use_fp_mb_stats) {
@@ -444,15 +446,14 @@ void vp9_first_pass(VP9_COMP *cpi) {
   set_first_pass_params(cpi);
   vp9_set_quantizer(cm, find_fp_qindex());
 
-  if (is_spatial_svc(cpi)) {
+  if (lc != NULL) {
     MV_REFERENCE_FRAME ref_frame = LAST_FRAME;
     const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL;
-    twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass;
+    twopass = &lc->twopass;
 
     if (cpi->common.current_video_frame == 0) {
       cpi->ref_frame_flags = 0;
     } else {
-      LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
       if (lc->current_video_frame_in_layer == 0)
         cpi->ref_frame_flags = VP9_GOLD_FLAG;
       else
@@ -511,9 +512,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
   vp9_tile_init(&tile, cm, 0, 0);
 
   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
-    int_mv best_ref_mv;
-
-    best_ref_mv.as_int = 0;
+    MV best_ref_mv = {0, 0};
 
     // Reset above block coeffs.
     xd->up_available = (mb_row != 0);
@@ -593,14 +592,13 @@ void vp9_first_pass(VP9_COMP *cpi) {
       // Other than for the first frame do a motion search.
       if (cm->current_video_frame > 0) {
         int tmp_err, motion_error, raw_motion_error;
-        int_mv mv, tmp_mv;
+        // Assume 0,0 motion with no mv overhead.
+        MV mv = {0, 0} , tmp_mv = {0, 0};
         struct buf_2d unscaled_last_source_buf_2d;
 
         xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
         motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                             &xd->plane[0].pre[0]);
-        // Assume 0,0 motion with no mv overhead.
-        mv.as_int = tmp_mv.as_int = 0;
 
         // Compute the motion error of the 0,0 motion using the last source
         // frame as the reference. Skip the further motion search on
@@ -613,11 +611,10 @@ void vp9_first_pass(VP9_COMP *cpi) {
                                                 &unscaled_last_source_buf_2d);
 
         // TODO(pengchong): Replace the hard-coded threshold
-        if (raw_motion_error > 25 || is_spatial_svc(cpi)) {
+        if (raw_motion_error > 25 || lc != NULL) {
           // Test last reference frame using the previous best mv as the
           // starting point (best reference) for the search.
-          first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &mv.as_mv,
-                                   &motion_error);
+          first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
           if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
             vp9_clear_system_state();
             motion_error = (int)(motion_error * error_weight);
@@ -625,9 +622,9 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
           // If the current best reference mv is not centered on 0,0 then do a
           // 0,0 based search as well.
-          if (best_ref_mv.as_int) {
+          if (!is_zero_mv(&best_ref_mv)) {
             tmp_err = INT_MAX;
-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv, &tmp_err);
+            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err);
             if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
               vp9_clear_system_state();
               tmp_err = (int)(tmp_err * error_weight);
@@ -635,7 +632,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
             if (tmp_err < motion_error) {
               motion_error = tmp_err;
-              mv.as_int = tmp_mv.as_int;
+              mv = tmp_mv;
             }
           }
 
@@ -648,7 +645,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
             gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
                                                    &xd->plane[0].pre[0]);
 
-            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
+            first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv,
                                      &gf_motion_error);
             if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
               vp9_clear_system_state();
@@ -679,7 +676,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
         }
 
         // Start by assuming that intra mode is best.
-        best_ref_mv.as_int = 0;
+        best_ref_mv.row = 0;
+        best_ref_mv.col = 0;
 
 #if CONFIG_FP_MB_STATS
         if (cpi->use_fp_mb_stats) {
@@ -703,25 +701,25 @@ void vp9_first_pass(VP9_COMP *cpi) {
               this_error < 2 * intrapenalty)
             ++neutral_count;
 
-          mv.as_mv.row *= 8;
-          mv.as_mv.col *= 8;
+          mv.row *= 8;
+          mv.col *= 8;
           this_error = motion_error;
           xd->mi[0]->mbmi.mode = NEWMV;
-          xd->mi[0]->mbmi.mv[0] = mv;
+          xd->mi[0]->mbmi.mv[0].as_mv = mv;
           xd->mi[0]->mbmi.tx_size = TX_4X4;
           xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME;
           xd->mi[0]->mbmi.ref_frame[1] = NONE;
           vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize);
           vp9_encode_sby_pass1(x, bsize);
-          sum_mvr += mv.as_mv.row;
-          sum_mvr_abs += abs(mv.as_mv.row);
-          sum_mvc += mv.as_mv.col;
-          sum_mvc_abs += abs(mv.as_mv.col);
-          sum_mvrs += mv.as_mv.row * mv.as_mv.row;
-          sum_mvcs += mv.as_mv.col * mv.as_mv.col;
+          sum_mvr += mv.row;
+          sum_mvr_abs += abs(mv.row);
+          sum_mvc += mv.col;
+          sum_mvc_abs += abs(mv.col);
+          sum_mvrs += mv.row * mv.row;
+          sum_mvcs += mv.col * mv.col;
           ++intercount;
 
-          best_ref_mv.as_int = mv.as_int;
+          best_ref_mv = mv;
 
 #if CONFIG_FP_MB_STATS
           if (cpi->use_fp_mb_stats) {
@@ -739,7 +737,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
           }
 #endif
 
-          if (mv.as_int) {
+          if (!is_zero_mv(&mv)) {
             ++mvcount;
 
 #if CONFIG_FP_MB_STATS
@@ -770,33 +768,33 @@ void vp9_first_pass(VP9_COMP *cpi) {
 #endif
 
             // Non-zero vector, was it different from the last non zero vector?
-            if (mv.as_int != lastmv_as_int)
+            if (!is_equal_mv(&mv, &lastmv))
               ++new_mv_count;
-            lastmv_as_int = mv.as_int;
+            lastmv = mv;
 
             // Does the row vector point inwards or outwards?
             if (mb_row < cm->mb_rows / 2) {
-              if (mv.as_mv.row > 0)
+              if (mv.row > 0)
                 --sum_in_vectors;
-              else if (mv.as_mv.row < 0)
+              else if (mv.row < 0)
                 ++sum_in_vectors;
             } else if (mb_row > cm->mb_rows / 2) {
-              if (mv.as_mv.row > 0)
+              if (mv.row > 0)
                 ++sum_in_vectors;
-              else if (mv.as_mv.row < 0)
+              else if (mv.row < 0)
                 --sum_in_vectors;
             }
 
             // Does the col vector point inwards or outwards?
             if (mb_col < cm->mb_cols / 2) {
-              if (mv.as_mv.col > 0)
+              if (mv.col > 0)
                 --sum_in_vectors;
-              else if (mv.as_mv.col < 0)
+              else if (mv.col < 0)
                 ++sum_in_vectors;
             } else if (mb_col > cm->mb_cols / 2) {
-              if (mv.as_mv.col > 0)
+              if (mv.col > 0)
                 ++sum_in_vectors;
-              else if (mv.as_mv.col < 0)
+              else if (mv.col < 0)
                 --sum_in_vectors;
             }
           }
@@ -864,7 +862,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
     // TODO(paulwilkins):  Handle the case when duration is set to 0, or
     // something less than the full time between subsequent values of
     // cpi->source_time_stamp.
-    fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start);
+    fps.duration = (double)(source->ts_end - source->ts_start);
 
     // Don't want to do output stats with a stack variable!
     twopass->this_frame_stats = fps;
@@ -895,7 +893,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
 
   vp9_extend_frame_borders(new_yv12);
 
-  if (is_spatial_svc(cpi)) {
+  if (lc != NULL) {
     vp9_update_reference_frames(cpi);
   } else {
     // Swap frame pointers so last frame refers to the frame we just compressed.
@@ -1081,8 +1079,7 @@ static double get_prediction_decay_rate(const VP9_COMMON *cm,
 
 // This function gives an estimate of how badly we believe the prediction
 // quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMMON *cm,
-                                     const FIRSTPASS_STATS *frame) {
+static double get_zero_motion_factor(const FIRSTPASS_STATS *frame) {
   const double sr_ratio = frame->coded_error /
                           DOUBLE_DIVIDE_CHECK(frame->sr_coded_error);
   const double zero_motion_pct = frame->pcnt_inter -
@@ -1095,12 +1092,10 @@ static double get_zero_motion_factor(const VP9_COMMON *cm,
 // Function to test for a condition where a complex transition is followed
 // by a static section. For example in slide shows where there is a fade
 // between slides. This is to help with more optimal kf and gf positioning.
-static int detect_transition_to_still(TWO_PASS *twopass,
+static int detect_transition_to_still(const TWO_PASS *twopass,
                                       int frame_interval, int still_interval,
                                       double loop_decay_rate,
                                       double last_decay_rate) {
-  int trans_to_still = 0;
-
   // Break clause to detect very still sections after motion
   // For example a static image after a fade or other transition
   // instead of a clean scene cut.
@@ -1108,26 +1103,22 @@ static int detect_transition_to_still(TWO_PASS *twopass,
       loop_decay_rate >= 0.999 &&
       last_decay_rate < 0.9) {
     int j;
-    const FIRSTPASS_STATS *position = twopass->stats_in;
-    FIRSTPASS_STATS tmp_next_frame;
 
     // Look ahead a few frames to see if static condition persists...
     for (j = 0; j < still_interval; ++j) {
-      if (EOF == input_stats(twopass, &tmp_next_frame))
+      const FIRSTPASS_STATS *stats = &twopass->stats_in[j];
+      if (stats >= twopass->stats_in_end)
         break;
 
-      if (tmp_next_frame.pcnt_inter - tmp_next_frame.pcnt_motion < 0.999)
+      if (stats->pcnt_inter - stats->pcnt_motion < 0.999)
         break;
     }
 
-    reset_fpf_position(twopass, position);
-
     // Only if it does do we signal a transition to still.
-    if (j == still_interval)
-      trans_to_still = 1;
+    return j == still_interval;
   }
 
-  return trans_to_still;
+  return 0;
 }
 
 // This function detects a flash through the high relative pcnt_second_ref
@@ -1373,7 +1364,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
                                    double group_error, int gf_arf_bits) {
   RATE_CONTROL *const rc = &cpi->rc;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
-  TWO_PASS *twopass = &cpi->twopass;
+  TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
   FIRSTPASS_STATS frame_stats;
   int i;
   int frame_index = 1;
@@ -1396,17 +1388,17 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
   // is also the golden frame.
   if (!key_frame) {
     if (rc->source_alt_ref_active) {
-      twopass->gf_group.update_type[0] = OVERLAY_UPDATE;
-      twopass->gf_group.rf_level[0] = INTER_NORMAL;
-      twopass->gf_group.bit_allocation[0] = 0;
-      twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
-      twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+      gf_group->update_type[0] = OVERLAY_UPDATE;
+      gf_group->rf_level[0] = INTER_NORMAL;
+      gf_group->bit_allocation[0] = 0;
+      gf_group->arf_update_idx[0] = arf_buffer_indices[0];
+      gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
     } else {
-      twopass->gf_group.update_type[0] = GF_UPDATE;
-      twopass->gf_group.rf_level[0] = GF_ARF_STD;
-      twopass->gf_group.bit_allocation[0] = gf_arf_bits;
-      twopass->gf_group.arf_update_idx[0] = arf_buffer_indices[0];
-      twopass->gf_group.arf_ref_idx[0] = arf_buffer_indices[0];
+      gf_group->update_type[0] = GF_UPDATE;
+      gf_group->rf_level[0] = GF_ARF_STD;
+      gf_group->bit_allocation[0] = gf_arf_bits;
+      gf_group->arf_update_idx[0] = arf_buffer_indices[0];
+      gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
     }
 
     // Step over the golden frame / overlay frame
@@ -1421,25 +1413,25 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
 
   // Store the bits to spend on the ARF if there is one.
   if (rc->source_alt_ref_pending) {
-    twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
-    twopass->gf_group.bit_allocation[frame_index] = gf_arf_bits;
-    twopass->gf_group.arf_src_offset[frame_index] =
+    gf_group->update_type[frame_index] = ARF_UPDATE;
+    gf_group->rf_level[frame_index] = GF_ARF_STD;
+    gf_group->bit_allocation[frame_index] = gf_arf_bits;
+    gf_group->arf_src_offset[frame_index] =
       (unsigned char)(rc->baseline_gf_interval - 1);
-    twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
-    twopass->gf_group.arf_ref_idx[frame_index] =
+    gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+    gf_group->arf_ref_idx[frame_index] =
       arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
                          rc->source_alt_ref_active];
     ++frame_index;
 
     if (cpi->multi_arf_enabled) {
       // Set aside a slot for a level 1 arf.
-      twopass->gf_group.update_type[frame_index] = ARF_UPDATE;
-      twopass->gf_group.rf_level[frame_index] = GF_ARF_LOW;
-      twopass->gf_group.arf_src_offset[frame_index] =
+      gf_group->update_type[frame_index] = ARF_UPDATE;
+      gf_group->rf_level[frame_index] = GF_ARF_LOW;
+      gf_group->arf_src_offset[frame_index] =
         (unsigned char)((rc->baseline_gf_interval >> 1) - 1);
-      twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[1];
-      twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+      gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1];
+      gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
       ++frame_index;
     }
   }
@@ -1469,16 +1461,16 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
       if (frame_index <= mid_frame_idx)
         arf_idx = 1;
     }
-    twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
-    twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
+    gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
+    gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
 
     target_frame_size = clamp(target_frame_size, 0,
                               MIN(max_bits, (int)total_group_bits));
 
-    twopass->gf_group.update_type[frame_index] = LF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+    gf_group->update_type[frame_index] = LF_UPDATE;
+    gf_group->rf_level[frame_index] = INTER_NORMAL;
 
-    twopass->gf_group.bit_allocation[frame_index] = target_frame_size;
+    gf_group->bit_allocation[frame_index] = target_frame_size;
     ++frame_index;
   }
 
@@ -1486,23 +1478,23 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
   // We need to configure the frame at the end of the sequence + 1 that will be
   // the start frame for the next group. Otherwise prior to the call to
   // vp9_rc_get_second_pass_params() the data will be undefined.
-  twopass->gf_group.arf_update_idx[frame_index] = arf_buffer_indices[0];
-  twopass->gf_group.arf_ref_idx[frame_index] = arf_buffer_indices[0];
+  gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
+  gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
 
   if (rc->source_alt_ref_pending) {
-    twopass->gf_group.update_type[frame_index] = OVERLAY_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = INTER_NORMAL;
+    gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+    gf_group->rf_level[frame_index] = INTER_NORMAL;
 
     // Final setup for second arf and its overlay.
     if (cpi->multi_arf_enabled) {
-      twopass->gf_group.bit_allocation[2] =
-        twopass->gf_group.bit_allocation[mid_frame_idx] + mid_boost_bits;
-      twopass->gf_group.update_type[mid_frame_idx] = OVERLAY_UPDATE;
-      twopass->gf_group.bit_allocation[mid_frame_idx] = 0;
+      gf_group->bit_allocation[2] =
+          gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
+      gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
+      gf_group->bit_allocation[mid_frame_idx] = 0;
     }
   } else {
-    twopass->gf_group.update_type[frame_index] = GF_UPDATE;
-    twopass->gf_group.rf_level[frame_index] = GF_ARF_STD;
+    gf_group->update_type[frame_index] = GF_UPDATE;
+    gf_group->rf_level[frame_index] = GF_ARF_STD;
   }
 
   // Note whether multi-arf was enabled this group for next time.
@@ -1554,8 +1546,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   vp9_clear_system_state();
   vp9_zero(next_frame);
 
-  gf_group_bits = 0;
-
   // Load stats for the current frame.
   mod_frame_err = calculate_modified_err(twopass, oxcf, this_frame);
 
@@ -1615,9 +1605,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       decay_accumulator = decay_accumulator * loop_decay_rate;
 
       // Monitor for static sections.
-      zero_motion_accumulator =
-        MIN(zero_motion_accumulator,
-            get_zero_motion_factor(&cpi->common, &next_frame));
+      zero_motion_accumulator = MIN(zero_motion_accumulator,
+                                    get_zero_motion_factor(&next_frame));
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
@@ -1831,6 +1820,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   int i, j;
   RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
   const FIRSTPASS_STATS first_frame = *this_frame;
   const FIRSTPASS_STATS *const start_position = twopass->stats_in;
@@ -1849,7 +1839,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   cpi->common.frame_type = KEY_FRAME;
 
   // Reset the GF group data structures.
-  vp9_zero(twopass->gf_group);
+  vp9_zero(*gf_group);
 
   // Is this a forced key frame by interval.
   rc->this_key_frame_forced = rc->next_key_frame_forced;
@@ -1987,9 +1977,8 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
       break;
 
     // Monitor for static sections.
-    zero_motion_accumulator =
-      MIN(zero_motion_accumulator,
-          get_zero_motion_factor(&cpi->common, &next_frame));
+    zero_motion_accumulator =MIN(zero_motion_accumulator,
+                                 get_zero_motion_factor(&next_frame));
 
     // For the first few frames collect data to decide kf boost.
     if (i <= (rc->max_gf_interval * 2)) {
@@ -2040,9 +2029,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   twopass->kf_group_bits -= kf_bits;
 
   // Save the bits to spend on the key frame.
-  twopass->gf_group.bit_allocation[0] = kf_bits;
-  twopass->gf_group.update_type[0] = KF_UPDATE;
-  twopass->gf_group.rf_level[0] = KF_STD;
+  gf_group->bit_allocation[0] = kf_bits;
+  gf_group->update_type[0] = KF_UPDATE;
+  gf_group->rf_level[0] = KF_STD;
 
   // Note the total error score of the kf group minus the key frame itself.
   twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err);
@@ -2119,15 +2108,16 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
   TWO_PASS *const twopass = &cpi->twopass;
+  GF_GROUP *const gf_group = &twopass->gf_group;
   int frames_left;
   FIRSTPASS_STATS this_frame;
   FIRSTPASS_STATS this_frame_copy;
 
   int target_rate;
-  LAYER_CONTEXT *lc = NULL;
+  LAYER_CONTEXT *const lc = is_spatial_svc(cpi) ?
+        &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : 0;
 
-  if (is_spatial_svc(cpi)) {
-    lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+  if (lc != NULL) {
     frames_left = (int)(twopass->total_stats.count -
                   lc->current_video_frame_in_layer);
   } else {
@@ -2140,10 +2130,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   // If this is an arf frame then we dont want to read the stats file or
   // advance the input pointer as we already have what we need.
-  if (twopass->gf_group.update_type[twopass->gf_group.index] == ARF_UPDATE) {
+  if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
     int target_rate;
     configure_buffer_updates(cpi);
-    target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+    target_rate = gf_group->bit_allocation[gf_group->index];
     target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
     rc->base_frame_target = target_rate;
 
@@ -2154,7 +2144,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     vp9_rc_set_frame_target(cpi, target_rate);
     cm->frame_type = INTER_FRAME;
 
-    if (is_spatial_svc(cpi)) {
+    if (lc != NULL) {
       if (cpi->svc.spatial_layer_id == 0) {
         lc->is_key_frame = 0;
       } else {
@@ -2170,7 +2160,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
 
   vp9_clear_system_state();
 
-  if (is_spatial_svc(cpi) && twopass->kf_intra_err_min == 0) {
+  if (lc != NULL && twopass->kf_intra_err_min == 0) {
     twopass->kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs;
     twopass->gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs;
   }
@@ -2178,8 +2168,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   if (cpi->oxcf.rc_mode == VPX_Q) {
     twopass->active_worst_quality = cpi->oxcf.cq_level;
   } else if (cm->current_video_frame == 0 ||
-             (is_spatial_svc(cpi) &&
-              lc->current_video_frame_in_layer == 0)) {
+             (lc != NULL && lc->current_video_frame_in_layer == 0)) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
@@ -2205,7 +2194,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     cm->frame_type = INTER_FRAME;
   }
 
-  if (is_spatial_svc(cpi)) {
+  if (lc != NULL) {
     if (cpi->svc.spatial_layer_id == 0) {
       lc->is_key_frame = (cm->frame_type == KEY_FRAME);
       if (lc->is_key_frame)
@@ -2236,13 +2225,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     }
 
     rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-    if (!is_spatial_svc(cpi))
+    if (lc != NULL)
       cpi->refresh_golden_frame = 1;
   }
 
   configure_buffer_updates(cpi);
 
-  target_rate = twopass->gf_group.bit_allocation[twopass->gf_group.index];
+  target_rate = gf_group->bit_allocation[gf_group->index];
   if (cpi->common.frame_type == KEY_FRAME)
     target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
   else
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index bf8c9fd96..aaa6b0311 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -121,7 +121,7 @@ struct VP9_COMP;
 
 void vp9_init_first_pass(struct VP9_COMP *cpi);
 void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
-void vp9_first_pass(struct VP9_COMP *cpi);
+void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
 void vp9_end_first_pass(struct VP9_COMP *cpi);
 
 void vp9_init_second_pass(struct VP9_COMP *cpi);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 6115f5a0f..7a7bb2824 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -17,6 +17,7 @@
 
 #include "vpx_mem/vpx_mem.h"
 
+#include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_mvref_common.h"
 #include "vp9/common/vp9_reconinter.h"
@@ -253,7 +254,8 @@ static int get_pred_buffer(PRED_BUFFER *p, int len) {
 }
 
 static void free_pred_buffer(PRED_BUFFER *p) {
-  p->in_use = 0;
+  if (p != NULL)
+    p->in_use = 0;
 }
 
 static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
@@ -343,6 +345,52 @@ static void encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
   }
 }
 
+struct estimate_block_intra_args {
+  VP9_COMP *cpi;
+  MACROBLOCK *x;
+  PREDICTION_MODE mode;
+  int rate;
+  int64_t dist;
+};
+
+static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+                                 TX_SIZE tx_size, void *arg) {
+  struct estimate_block_intra_args* const args = arg;
+  VP9_COMP *const cpi = args->cpi;
+  MACROBLOCK *const x = args->x;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  struct macroblock_plane *const p = &x->plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
+  const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size];
+  uint8_t *const src_buf_base = p->src.buf;
+  uint8_t *const dst_buf_base = pd->dst.buf;
+  const int src_stride = p->src.stride;
+  const int dst_stride = pd->dst.stride;
+  int i, j;
+  int rate;
+  int64_t dist;
+  unsigned int var_y, sse_y;
+  txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
+  assert(plane == 0);
+  (void) plane;
+
+  p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
+  pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
+  // Use source buffer as an approximation for the fully reconstructed buffer.
+  vp9_predict_intra_block(xd, block >> (2 * tx_size),
+                          b_width_log2(plane_bsize),
+                          tx_size, args->mode,
+                          p->src.buf, src_stride,
+                          pd->dst.buf, dst_stride,
+                          i, j, 0);
+  // This procedure assumes zero offset from p->src.buf and pd->dst.buf.
+  model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
+  p->src.buf = src_buf_base;
+  pd->dst.buf = dst_buf_base;
+  args->rate += rate;
+  args->dist += dist;
+}
+
 static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][4] = {
   {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
   {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
@@ -360,7 +408,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                             PICK_MODE_CONTEXT *ctx) {
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
-  struct macroblock_plane *const p = &x->plane[0];
   struct macroblockd_plane *const pd = &xd->plane[0];
   PREDICTION_MODE this_mode, best_mode = ZEROMV;
   MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
@@ -397,9 +444,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
       (((mi_row + mi_col) >> bsl) +
        get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
   int const_motion[MAX_REF_FRAMES] = { 0 };
-  int bh = num_4x4_blocks_high_lookup[bsize] << 2;
-  int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
-  int pixels_in_block = bh * bw;
+  const int bh = num_4x4_blocks_high_lookup[bsize] << 2;
+  const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
+  const int pixels_in_block = bh * bw;
   // For speed 6, the result of interp filter is reused later in actual encoding
   // process.
   // tmp[3] points to dst buffer, and the other 3 point to allocated buffers.
@@ -609,8 +656,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
       if (cpi->oxcf.noise_sensitivity > 0) {
-        vp9_denoiser_update_frame_stats(&cpi->denoiser, mbmi, sse_y,
-                                        this_mode, ctx);
+        vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
       }
 #endif
 
@@ -625,8 +671,7 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         skip_txfm = x->skip_txfm[0];
 
         if (cpi->sf.reuse_inter_pred_sby) {
-          if (best_pred != NULL)
-            free_pred_buffer(best_pred);
+          free_pred_buffer(best_pred);
 
           best_pred = this_mode_pred;
         }
@@ -646,7 +691,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
 
   // If best prediction is not in dst buf, then copy the prediction block from
   // temp buf to dst buf.
-  if (cpi->sf.reuse_inter_pred_sby && best_pred->data != orig_dst.buf) {
+  if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
+      best_pred->data != orig_dst.buf) {
     uint8_t *copy_from, *copy_to;
 
     pd->dst = orig_dst;
@@ -670,20 +716,11 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   // threshold.
   if (!x->skip && best_rd > inter_mode_thresh &&
       bsize <= cpi->sf.max_intra_bsize) {
-    int i, j;
-    const int width  = num_4x4_blocks_wide_lookup[bsize];
-    const int height = num_4x4_blocks_high_lookup[bsize];
+    struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
 
-    int rate2 = 0;
-    int64_t dist2 = 0;
-    const int dst_stride = cpi->sf.reuse_inter_pred_sby ? bw : pd->dst.stride;
-    const int src_stride = p->src.stride;
-    int block_idx = 0;
-
-    TX_SIZE tmp_tx_size = MIN(max_txsize_lookup[bsize],
-                              tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
-    const BLOCK_SIZE bsize_tx = txsize_to_bsize[tmp_tx_size];
-    const int step = 1 << tmp_tx_size;
+    const TX_SIZE intra_tx_size =
+        MIN(max_txsize_lookup[bsize],
+            tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
 
     if (cpi->sf.reuse_inter_pred_sby) {
       pd->dst.buf = tmp[0].data;
@@ -691,44 +728,26 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
     }
 
     for (this_mode = DC_PRED; this_mode <= DC_PRED; ++this_mode) {
-      uint8_t *const src_buf_base = p->src.buf;
-      uint8_t *const dst_buf_base = pd->dst.buf;
-      for (j = 0; j < height; j += step) {
-        for (i = 0; i < width; i += step) {
-          p->src.buf = &src_buf_base[4 * (j * src_stride + i)];
-          pd->dst.buf = &dst_buf_base[4 * (j * dst_stride + i)];
-          // Use source buffer as an approximation for the fully reconstructed
-          // buffer
-          vp9_predict_intra_block(xd, block_idx, b_width_log2(bsize),
-                                  tmp_tx_size, this_mode,
-                                  p->src.buf, src_stride,
-                                  pd->dst.buf, dst_stride,
-                                  i, j, 0);
-          model_rd_for_sb_y(cpi, bsize_tx, x, xd, &rate, &dist, &var_y, &sse_y);
-          rate2 += rate;
-          dist2 += dist;
-          ++block_idx;
-        }
-      }
-      p->src.buf = src_buf_base;
-      pd->dst.buf = dst_buf_base;
-
-      rate = rate2;
-      dist = dist2;
-
+      const TX_SIZE saved_tx_size = mbmi->tx_size;
+      args.mode = this_mode;
+      args.rate = 0;
+      args.dist = 0;
+      mbmi->tx_size = intra_tx_size;
+      vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
+                                             estimate_block_intra, &args);
+      mbmi->tx_size = saved_tx_size;
+      rate = args.rate;
+      dist = args.dist;
       rate += cpi->mbmode_cost[this_mode];
       rate += intra_cost_penalty;
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
 
-      if (cpi->sf.reuse_inter_pred_sby)
-        pd->dst = orig_dst;
-
       if (this_rd + intra_mode_cost < best_rd) {
         best_rd = this_rd;
         *returnrate = rate;
         *returndistortion = dist;
         mbmi->mode = this_mode;
-        mbmi->tx_size = tmp_tx_size;
+        mbmi->tx_size = intra_tx_size;
         mbmi->ref_frame[0] = INTRA_FRAME;
         mbmi->uv_mode = this_mode;
         mbmi->mv[0].as_int = INVALID_MV;
@@ -736,6 +755,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         x->skip_txfm[0] = skip_txfm;
       }
     }
+    if (cpi->sf.reuse_inter_pred_sby)
+      pd->dst = orig_dst;
   }
 
   return INT64_MAX;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 9da2adec4..b926a58f4 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -646,7 +646,6 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
   int q;
 
   if (frame_is_intra_only(cm)) {
-    active_best_quality = rc->best_quality;
 
     // Handle the special case for key frames forced when we have reached
     // the maximum key frame interval. Here force the Q to a range
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 4fc3e9e08..2841efabe 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -462,7 +462,7 @@ void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
 
   // Set baseline threshold values.
   for (i = 0; i < MAX_MODES; ++i)
-    rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+    rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 
   rd->thresh_mult[THR_NEARESTMV] = 0;
   rd->thresh_mult[THR_NEARESTG] = 0;
@@ -548,7 +548,7 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
   int i;
 
   for (i = 0; i < MAX_REFS; ++i)
-    rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode)  ? -500 : 0;
+    rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
 
   rd->thresh_mult_sub8x8[THR_LAST] += 2500;
   rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 053dbbbd0..546bc8e97 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -486,7 +486,6 @@ static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
   txfm_rd_in_plane(x, rate, distortion, skip,
                    sse, ref_best_rd, 0, bs,
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
-  cpi->tx_stepdown_count[0]++;
 }
 
 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
@@ -508,24 +507,24 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX},
                              {INT64_MAX, INT64_MAX}};
-  TX_SIZE n, m;
+  int n, m;
   int s0, s1;
   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
   int64_t best_rd = INT64_MAX;
-  TX_SIZE best_tx = TX_4X4;
+  TX_SIZE best_tx = max_tx_size;
 
   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
   assert(skip_prob > 0);
   s0 = vp9_cost_bit(skip_prob, 0);
   s1 = vp9_cost_bit(skip_prob, 1);
 
-  for (n = TX_4X4; n <= max_tx_size; n++) {
+  for (n = max_tx_size; n >= 0;  n--) {
     txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
                      &sse[n], ref_best_rd, 0, bs, n,
                      cpi->sf.use_fast_coef_costing);
     r[n][1] = r[n][0];
     if (r[n][0] < INT_MAX) {
-      for (m = 0; m <= n - (n == max_tx_size); m++) {
+      for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
         if (m == n)
           r[n][1] += vp9_cost_zero(tx_probs[m]);
         else
@@ -541,6 +540,13 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
     }
 
+    // Early termination in transform size search.
+    if (cpi->sf.tx_size_search_breakout &&
+        (rd[n][1] == INT64_MAX ||
+        (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
+        s[n] == 1))
+      break;
+
     if (rd[n][1] < best_rd) {
       best_tx = n;
       best_rd = rd[n][1];
@@ -562,16 +568,12 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 
   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
-    cpi->tx_stepdown_count[0]++;
   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
-    cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
-    cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
   } else {
     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
-    cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
   }
 }
 
@@ -1382,7 +1384,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
           if (best_rd < label_mv_thresh)
             break;
 
-          if (!is_best_mode(cpi->oxcf.mode)) {
+          if (cpi->oxcf.mode != BEST) {
             // use previous block's result as next block's MV predictor.
             if (i > 0) {
               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
@@ -1424,7 +1426,7 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                           INT_MAX, 1);
 
           // Should we do a full search (best quality only)
-          if (is_best_mode(cpi->oxcf.mode)) {
+          if (cpi->oxcf.mode == BEST) {
             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
             /* Check if mvp_full is within the range. */
             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
@@ -2278,6 +2280,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         } else {
           int rate_sum = 0;
           int64_t dist_sum = 0;
+          if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
+              (cpi->sf.interp_filter_search_mask & (1 << i))) {
+            rate_sum = INT_MAX;
+            dist_sum = INT64_MAX;
+            continue;
+          }
+
           if ((cm->interp_filter == SWITCHABLE &&
                (!i || best_needs_copy)) ||
               (cm->interp_filter != SWITCHABLE &&
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index a2f4583a5..879c83c08 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -50,8 +50,20 @@ enum {
                               (1 << THR_GOLD)
 };
 
+// Intra only frames, golden frames (except alt ref overlays) and
+// alt ref frames tend to be coded at a higher than ambient quality
+static int frame_is_boosted(const VP9_COMP *cpi) {
+  return frame_is_intra_only(&cpi->common) ||
+         cpi->refresh_alt_ref_frame ||
+         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
+         vp9_is_upper_layer_key_frame(cpi);
+}
+
+
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
+  const int boosted = frame_is_boosted(cpi);
+
   sf->adaptive_rd_thresh = 1;
   sf->recode_loop = (speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW;
   sf->allow_skip_recode = 1;
@@ -59,8 +71,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   if (speed >= 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
-    sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
-                                                      : USE_LARGESTALL;
 
     if (MIN(cm->width, cm->height) >= 720)
       sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -80,9 +90,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
+
+    sf->tx_size_search_breakout = 1;
   }
 
   if (speed >= 2) {
+    sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+                                                      : USE_LARGESTALL;
+
     if (MIN(cm->width, cm->height) >= 720) {
       sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
       sf->last_partitioning_redo_frequency = 3;
@@ -117,10 +132,10 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
       sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT;
     }
     sf->adaptive_pred_interp_filter = 0;
-    sf->cb_partition_search = frame_is_boosted(cpi) ? 0 : 1;
+    sf->cb_partition_search = !boosted;
     sf->cb_pred_filter_search = 1;
     sf->alt_ref_search_fp = 1;
-    sf->motion_field_mode_search = frame_is_boosted(cpi) ? 0 : 1;
+    sf->motion_field_mode_search = !boosted;
     sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
     sf->last_partitioning_redo_frequency = 3;
     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
@@ -128,6 +143,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->mode_skip_start = 6;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
+    sf->adaptive_interp_filter_search = 1;
   }
 
   if (speed >= 4) {
@@ -165,8 +181,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
 static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
                                  int speed, vp9e_tune_content content) {
   VP9_COMMON *const cm = &cpi->common;
-  const int frames_since_key =
-      cm->frame_type == KEY_FRAME ? 0 : cpi->rc.frames_since_key;
+  const int is_keyframe = cm->frame_type == KEY_FRAME;
+  const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
   sf->use_fast_coef_costing = 1;
@@ -262,17 +278,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
   }
 
   if (speed >= 5) {
-    sf->use_quant_fp = cm->frame_type == KEY_FRAME ? 0 : 1;
-    sf->auto_min_max_partition_size = (cm->frame_type == KEY_FRAME) ?
-        RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX;
+    sf->use_quant_fp = !is_keyframe;
+    sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX
+                                                  : STRICT_NEIGHBORING_MIN_MAX;
     sf->max_partition_size = BLOCK_32X32;
     sf->min_partition_size = BLOCK_8X8;
     sf->partition_check =
         (frames_since_key % sf->last_partitioning_redo_frequency == 1);
-    sf->force_frame_boost = cm->frame_type == KEY_FRAME ||
-        (frames_since_key %
-            (sf->last_partitioning_redo_frequency << 1) == 1);
-    sf->max_delta_qindex = (cm->frame_type == KEY_FRAME) ? 20 : 15;
+    sf->force_frame_boost = is_keyframe ||
+        (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
+    sf->max_delta_qindex = is_keyframe ? 20 : 15;
     sf->partition_search_type = REFERENCE_PARTITION;
     sf->use_nonrd_pick_mode = 1;
     sf->allow_skip_recode = 0;
@@ -290,8 +305,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
     sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
     sf->search_type_check_frequency = 50;
 
-    sf->tx_size_search_method = (cm->frame_type == KEY_FRAME) ?
-        USE_LARGESTALL : USE_TX_8X8;
+    sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
 
     // This feature is only enabled when partition search is disabled.
     sf->reuse_inter_pred_sby = 1;
@@ -301,6 +315,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
 
     sf->mv.reduce_first_step_size = 1;
   }
+
   if (speed >= 7) {
     sf->mv.search_method = FAST_DIAMOND;
     sf->mv.fullpel_search_step_param = 10;
@@ -309,10 +324,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
         800 : 300;
     sf->elevate_newmv_thresh = 2500;
   }
+
   if (speed >= 12) {
     sf->elevate_newmv_thresh = 4000;
     sf->mv.subpel_force_stop = 2;
   }
+
   if (speed >= 13) {
     int i;
     sf->max_intra_bsize = BLOCK_32X32;
@@ -365,6 +382,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->force_frame_boost = 0;
   sf->max_delta_qindex = 0;
   sf->disable_filter_search_var_thresh = 0;
+  sf->adaptive_interp_filter_search = 0;
+
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
     sf->intra_uv_mode_mask[i] = INTRA_ALL;
@@ -391,17 +410,16 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   // Recode loop tolerence %.
   sf->recode_tolerance = 25;
   sf->default_interp_filter = SWITCHABLE;
+  sf->tx_size_search_breakout = 0;
 
-  if (oxcf->mode == REALTIME) {
+  if (oxcf->mode == REALTIME)
     set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
-  } else {
-    if (!is_best_mode(oxcf->mode))
-      set_good_speed_feature(cpi, cm, sf, oxcf->speed);
-  }
+  else if (oxcf->mode == GOOD)
+    set_good_speed_feature(cpi, cm, sf, oxcf->speed);
 
   cpi->full_search_sad = vp9_full_search_sad;
-  cpi->diamond_search_sad = is_best_mode(oxcf->mode) ? vp9_full_range_search
-                                                     : vp9_diamond_search_sad;
+  cpi->diamond_search_sad = oxcf->mode == BEST ? vp9_full_range_search
+                                               : vp9_diamond_search_sad;
   cpi->refining_search_sad = vp9_refining_search_sad;
 
 
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 3e8215f0c..e2e5c1e99 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -103,6 +103,12 @@ typedef enum {
 } MODE_SEARCH_SKIP_LOGIC;
 
 typedef enum {
+  FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP,
+  FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH,
+  FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP,
+} INTERP_FILTER_MASK;
+
+typedef enum {
   // Search partitions using RD/NONRD criterion
   SEARCH_PARTITION = 0,
 
@@ -376,6 +382,16 @@ typedef struct SPEED_FEATURES {
 
   // default interp filter choice
   INTERP_FILTER default_interp_filter;
+
+  // Early termination in transform size search, which only applies while
+  // tx_size_search_method is USE_FULL_RD.
+  int tx_size_search_breakout;
+
+  // adaptive interp_filter search to allow skip of certain filter types.
+  int adaptive_interp_filter_search;
+
+  // mask for skip evaluation of certain interp_filter type.
+  INTERP_FILTER_MASK interp_filter_search_mask;
 } SPEED_FEATURES;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_ssim.c b/vp9/encoder/vp9_ssim.c
index 026e6a8fd..8435640c8 100644
--- a/vp9/encoder/vp9_ssim.c
+++ b/vp9/encoder/vp9_ssim.c
@@ -95,7 +95,7 @@ double vp9_ssim2(uint8_t *img1, uint8_t *img2, int stride_img1,
   return ssim_total;
 }
 double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
-                     int lumamask, double *weight) {
+                     double *weight) {
   double a, b, c;
   double ssimv;
 
diff --git a/vp9/encoder/vp9_ssim.h b/vp9/encoder/vp9_ssim.h
index a581c2c23..d1dd1b722 100644
--- a/vp9/encoder/vp9_ssim.h
+++ b/vp9/encoder/vp9_ssim.h
@@ -18,7 +18,7 @@ extern "C" {
 #include "vpx_scale/yv12config.h"
 
 double vp9_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
-                     int lumamask, double *weight);
+                     double *weight);
 
 double vp9_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
                       double *ssim_y, double *ssim_u, double *ssim_v);
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index fb52d1ab7..9bd9792bd 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -36,6 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
     int i;
     lc->current_video_frame_in_layer = 0;
     lc->layer_size = 0;
+    lc->last_frame_type = FRAME_TYPES;
     lrc->ni_av_qi = oxcf->worst_allowed_q;
     lrc->total_actual_bits = 0;
     lrc->total_target_vs_actual = 0;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 801449b6f..d475d5fcd 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -25,9 +25,10 @@ typedef struct {
   double framerate;
   int avg_frame_size;
   TWO_PASS twopass;
-  struct vpx_fixed_buf rc_twopass_stats_in;
+  vpx_fixed_buf_t rc_twopass_stats_in;
   unsigned int current_video_frame_in_layer;
   int is_key_frame;
+  FRAME_TYPE last_frame_type;
   vpx_svc_parameters_t svc_params_received;
   struct lookahead_entry  *alt_ref_source;
   int alt_ref_idx;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index cdbb69b2d..5ff02d882 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -21,7 +21,6 @@
 #include "vp9/vp9_iface_common.h"
 
 struct vp9_extracfg {
-  struct vpx_codec_pkt_list *pkt_list;
   int                         cpu_used;  // available cpu percentage in 1/16
   unsigned int                enable_auto_alt_ref;
   unsigned int                noise_sensitivity;
@@ -43,37 +42,26 @@ struct vp9_extracfg {
   vp9e_tune_content           content;
 };
 
-struct extraconfig_map {
-  unsigned int usage;
-  struct vp9_extracfg cfg;
-};
-
-static const struct extraconfig_map extracfg_map[] = {
-  {
-    0,
-    { // NOLINT
-      NULL,
-      0,                          // cpu_used
-      1,                          // enable_auto_alt_ref
-      0,                          // noise_sensitivity
-      0,                          // sharpness
-      0,                          // static_thresh
-      0,                          // tile_columns
-      0,                          // tile_rows
-      7,                          // arnr_max_frames
-      5,                          // arnr_strength
-      3,                          // arnr_type
-      VP8_TUNE_PSNR,              // tuning
-      10,                         // cq_level
-      0,                          // rc_max_intra_bitrate_pct
-      0,                          // lossless
-      0,                          // frame_parallel_decoding_mode
-      NO_AQ,                      // aq_mode
-      0,                          // frame_periodic_delta_q
-      BITS_8,                     // Bit depth
-      VP9E_CONTENT_DEFAULT        // content
-    }
-  }
+static struct vp9_extracfg default_extra_cfg = {
+  0,                          // cpu_used
+  1,                          // enable_auto_alt_ref
+  0,                          // noise_sensitivity
+  0,                          // sharpness
+  0,                          // static_thresh
+  0,                          // tile_columns
+  0,                          // tile_rows
+  7,                          // arnr_max_frames
+  5,                          // arnr_strength
+  3,                          // arnr_type
+  VP8_TUNE_PSNR,              // tuning
+  10,                         // cq_level
+  0,                          // rc_max_intra_bitrate_pct
+  0,                          // lossless
+  0,                          // frame_parallel_decoding_mode
+  NO_AQ,                      // aq_mode
+  0,                          // frame_periodic_delta_q
+  BITS_8,                     // Bit depth
+  VP9E_CONTENT_DEFAULT        // content
 };
 
 struct vpx_codec_alg_priv {
@@ -188,6 +176,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
     if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
       ERROR("Not enough ref buffers for svc alt ref frames");
   }
+  if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)
+    ERROR("Multiple frame contexts are not supported for more than 3 layers");
 #endif
 
   RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
@@ -325,6 +315,7 @@ static vpx_codec_err_t set_encoder_config(
     VP9EncoderConfig *oxcf,
     const vpx_codec_enc_cfg_t *cfg,
     const struct vp9_extracfg *extra_cfg) {
+  const int is_vbr = cfg->rc_end_usage == VPX_VBR;
   oxcf->profile = cfg->g_profile;
   oxcf->width   = cfg->g_w;
   oxcf->height  = cfg->g_h;
@@ -334,17 +325,16 @@ static vpx_codec_err_t set_encoder_config(
   if (oxcf->init_framerate > 180)
     oxcf->init_framerate = 30;
 
+  oxcf->mode = GOOD;
+
   switch (cfg->g_pass) {
     case VPX_RC_ONE_PASS:
-      oxcf->mode = ONE_PASS_GOOD;
       oxcf->pass = 0;
       break;
     case VPX_RC_FIRST_PASS:
-      oxcf->mode = TWO_PASS_FIRST;
       oxcf->pass = 1;
       break;
     case VPX_RC_LAST_PASS:
-      oxcf->mode = TWO_PASS_SECOND_BEST;
       oxcf->pass = 2;
       break;
   }
@@ -371,9 +361,9 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->scaled_frame_width       = cfg->rc_scaled_width;
   oxcf->scaled_frame_height      = cfg->rc_scaled_height;
 
-  oxcf->maximum_buffer_size_ms   = cfg->rc_buf_sz;
-  oxcf->starting_buffer_level_ms = cfg->rc_buf_initial_sz;
-  oxcf->optimal_buffer_level_ms  = cfg->rc_buf_optimal_sz;
+  oxcf->maximum_buffer_size_ms   = is_vbr ? 240000 : cfg->rc_buf_sz;
+  oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz;
+  oxcf->optimal_buffer_level_ms  = is_vbr ? 60000 : cfg->rc_buf_optimal_sz;
 
   oxcf->drop_frames_water_mark   = cfg->rc_dropframe_thresh;
 
@@ -393,7 +383,6 @@ static vpx_codec_err_t set_encoder_config(
   oxcf->sharpness              =  extra_cfg->sharpness;
 
   oxcf->two_pass_stats_in      =  cfg->rc_twopass_stats_in;
-  oxcf->output_pkt_list        =  extra_cfg->pkt_list;
 
 #if CONFIG_FP_MB_STATS
   oxcf->firstpass_mb_stats_in  = cfg->rc_firstpass_mb_stats_in;
@@ -659,8 +648,6 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
   (void)data;
 
   if (ctx->priv == NULL) {
-    int i;
-    vpx_codec_enc_cfg_t *cfg;
     struct vpx_codec_alg_priv *priv = calloc(1, sizeof(*priv));
 
     if (priv == NULL)
@@ -668,7 +655,6 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
 
     ctx->priv = &priv->base;
     ctx->priv->sz = sizeof(*ctx->priv);
-    ctx->priv->iface = ctx->iface;
     ctx->priv->alg_priv = priv;
     ctx->priv->init_flags = ctx->init_flags;
     ctx->priv->enc.total_encoders = 1;
@@ -679,17 +665,7 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
       ctx->config.enc = &ctx->priv->alg_priv->cfg;
     }
 
-    cfg = &ctx->priv->alg_priv->cfg;
-
-    // Select the extra vp6 configuration table based on the current
-    // usage value. If the current usage value isn't found, use the
-    // values for usage case 0.
-    for (i = 0;
-         extracfg_map[i].usage && extracfg_map[i].usage != cfg->g_usage;
-         ++i) {}
-
-    priv->extra_cfg = extracfg_map[i].cfg;
-    priv->extra_cfg.pkt_list = &priv->pkt_list.head;
+    priv->extra_cfg = default_extra_cfg;
 
     vp9_initialize_enc();
 
@@ -701,10 +677,12 @@ static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx,
                          &ctx->priv->alg_priv->cfg,
                          &ctx->priv->alg_priv->extra_cfg);
       cpi = vp9_create_compressor(&ctx->priv->alg_priv->oxcf);
-      if (cpi == NULL)
+      if (cpi == NULL) {
         res = VPX_CODEC_MEM_ERROR;
-      else
+      } else {
+        cpi->output_pkt_list = &priv->pkt_list.head;
         ctx->priv->alg_priv->cpi = cpi;
+      }
     }
   }
 
@@ -718,31 +696,36 @@ static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) {
   return VPX_CODEC_OK;
 }
 
-static void pick_quickcompress_mode(vpx_codec_alg_priv_t  *ctx,
+static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
                                     unsigned long duration,
                                     unsigned long deadline) {
-  // Use best quality mode if no deadline is given.
-  MODE new_qc = ONE_PASS_BEST;
-
-  if (deadline) {
-    // Convert duration parameter from stream timebase to microseconds
-    const uint64_t duration_us = (uint64_t)duration * 1000000 *
-                               (uint64_t)ctx->cfg.g_timebase.num /
-                               (uint64_t)ctx->cfg.g_timebase.den;
-
-    // If the deadline is more that the duration this frame is to be shown,
-    // use good quality mode. Otherwise use realtime mode.
-    new_qc = (deadline > duration_us) ? ONE_PASS_GOOD : REALTIME;
-  }
+  MODE new_mode = BEST;
 
-  if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
-    new_qc = TWO_PASS_FIRST;
-  else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
-    new_qc = (new_qc == ONE_PASS_BEST) ? TWO_PASS_SECOND_BEST
-                                          : TWO_PASS_SECOND_GOOD;
+  switch (ctx->cfg.g_pass) {
+    case VPX_RC_ONE_PASS:
+      if (deadline > 0) {
+        const vpx_codec_enc_cfg_t *const cfg = &ctx->cfg;
+
+        // Convert duration parameter from stream timebase to microseconds.
+        const uint64_t duration_us = (uint64_t)duration * 1000000 *
+           (uint64_t)cfg->g_timebase.num /(uint64_t)cfg->g_timebase.den;
+
+        // If the deadline is more that the duration this frame is to be shown,
+        // use good quality mode. Otherwise use realtime mode.
+        new_mode = (deadline > duration_us) ? GOOD : REALTIME;
+      } else {
+        new_mode = BEST;
+      }
+      break;
+    case VPX_RC_FIRST_PASS:
+      break;
+    case VPX_RC_LAST_PASS:
+      new_mode = deadline > 0 ? GOOD : BEST;
+      break;
+  }
 
-  if (ctx->oxcf.mode != new_qc) {
-    ctx->oxcf.mode = new_qc;
+  if (ctx->oxcf.mode != new_mode) {
+    ctx->oxcf.mode = new_mode;
     vp9_change_config(ctx->cpi, &ctx->oxcf);
   }
 }
@@ -929,7 +912,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
 #endif
 
         // Pack invisible frames with the next visible frame
-        if (cpi->common.show_frame == 0
+        if (!cpi->common.show_frame
 #if CONFIG_SPATIAL_SVC
             || (is_spatial_svc(cpi) &&
                 cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
@@ -961,18 +944,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
             )
           pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
 
-        if (cpi->common.show_frame == 0) {
-          pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
-
-          // This timestamp should be as close as possible to the
-          // prior PTS so that if a decoder uses pts to schedule when
-          // to do this, we start right after last frame was decoded.
-          // Invisible frames have no duration.
-          pkt.data.frame.pts =
-              ticks_to_timebase_units(timebase, cpi->last_time_stamp_seen) + 1;
-          pkt.data.frame.duration = 0;
-        }
-
         if (cpi->droppable)
           pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE;
 
@@ -997,8 +968,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t  *ctx,
         cx_data_sz -= size;
 #if CONFIG_SPATIAL_SVC
         if (is_spatial_svc(cpi)) {
-          vpx_codec_cx_pkt_t pkt = {0};
+          vpx_codec_cx_pkt_t pkt;
           int i;
+          vp9_zero(pkt);
           pkt.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
           for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
             pkt.data.layer_sizes[i] = cpi->svc.layer_context[i].layer_size;
@@ -1350,11 +1322,11 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = {
   encoder_destroy,    // vpx_codec_destroy_fn_t
   encoder_ctrl_maps,  // vpx_codec_ctrl_fn_map_t
   {  // NOLINT
-    NOT_IMPLEMENTED,  // vpx_codec_peek_si_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_get_si_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_decode_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_frame_get_fn_t
-    NOT_IMPLEMENTED   // vpx_codec_set_fb_fn_t
+    NULL,  // vpx_codec_peek_si_fn_t
+    NULL,  // vpx_codec_get_si_fn_t
+    NULL,  // vpx_codec_decode_fn_t
+    NULL,  // vpx_codec_frame_get_fn_t
+    NULL   // vpx_codec_set_fb_fn_t
   },
   {  // NOLINT
     1,                      // 1 cfg map
@@ -1362,8 +1334,8 @@ CODEC_INTERFACE(vpx_codec_vp9_cx) = {
     encoder_encode,         // vpx_codec_encode_fn_t
     encoder_get_cxdata,     // vpx_codec_get_cx_data_fn_t
     encoder_set_config,     // vpx_codec_enc_config_set_fn_t
-    NOT_IMPLEMENTED,        // vpx_codec_get_global_headers_fn_t
+    NULL,        // vpx_codec_get_global_headers_fn_t
     encoder_get_preview,    // vpx_codec_get_preview_frame_fn_t
-    NOT_IMPLEMENTED         // vpx_codec_enc_mr_get_mem_loc_fn_t
+    NULL         // vpx_codec_enc_mr_get_mem_loc_fn_t
   }
 };
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index 4372ac9e5..05d61c8a0 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -66,7 +66,6 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
 
     ctx->priv = (vpx_codec_priv_t *)alg_priv;
     ctx->priv->sz = sizeof(*ctx->priv);
-    ctx->priv->iface = ctx->iface;
     ctx->priv->alg_priv = alg_priv;
     ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
     ctx->priv->init_flags = ctx->init_flags;
@@ -332,81 +331,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
   return VPX_CODEC_OK;
 }
 
-static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb,
-                                  void *decrypt_state,
-                                  const uint8_t *data) {
-  if (decrypt_cb) {
-    uint8_t marker;
-    decrypt_cb(decrypt_state, data, &marker, 1);
-    return marker;
-  }
-  return *data;
-}
-
-static vpx_codec_err_t parse_superframe_index(const uint8_t *data,
-                                              size_t data_sz,
-                                              uint32_t sizes[8], int *count,
-                                              vpx_decrypt_cb decrypt_cb,
-                                              void *decrypt_state) {
-  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
-  // it is a super frame index. If the last byte of real video compression
-  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
-  // not the associated matching marker byte at the front of the index we have
-  // an invalid bitstream and need to return an error.
-
-  uint8_t marker;
-
-  assert(data_sz);
-  marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1);
-  *count = 0;
-
-  if ((marker & 0xe0) == 0xc0) {
-    const uint32_t frames = (marker & 0x7) + 1;
-    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
-    const size_t index_sz = 2 + mag * frames;
-
-    // This chunk is marked as having a superframe index but doesn't have
-    // enough data for it, thus it's an invalid superframe index.
-    if (data_sz < index_sz)
-      return VPX_CODEC_CORRUPT_FRAME;
-
-    {
-      const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state,
-                                          data + data_sz - index_sz);
-
-      // This chunk is marked as having a superframe index but doesn't have
-      // the matching marker byte at the front of the index therefore it's an
-      // invalid chunk.
-      if (marker != marker2)
-        return VPX_CODEC_CORRUPT_FRAME;
-    }
-
-    {
-      // Found a valid superframe index.
-      uint32_t i, j;
-      const uint8_t *x = &data[data_sz - index_sz + 1];
-
-      // Frames has a maximum of 8 and mag has a maximum of 4.
-      uint8_t clear_buffer[32];
-      assert(sizeof(clear_buffer) >= frames * mag);
-      if (decrypt_cb) {
-        decrypt_cb(decrypt_state, x, clear_buffer, frames * mag);
-        x = clear_buffer;
-      }
-
-      for (i = 0; i < frames; ++i) {
-        uint32_t this_sz = 0;
-
-        for (j = 0; j < mag; ++j)
-          this_sz |= (*x++) << (j * 8);
-        sizes[i] = this_sz;
-      }
-      *count = frames;
-    }
-  }
-  return VPX_CODEC_OK;
-}
-
 static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
                                       const uint8_t *data, unsigned int data_sz,
                                       void *user_priv, long deadline) {
@@ -424,8 +348,8 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
   // Reset flushed when receiving a valid frame.
   ctx->flushed = 0;
 
-  res = parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
-                               ctx->decrypt_cb, ctx->decrypt_state);
+  res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count,
+                                   ctx->decrypt_cb, ctx->decrypt_state);
   if (res != VPX_CODEC_OK)
     return res;
 
@@ -723,12 +647,12 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = {
   },
   { // NOLINT
     0,
-    NOT_IMPLEMENTED,  // vpx_codec_enc_cfg_map_t
-    NOT_IMPLEMENTED,  // vpx_codec_encode_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_get_cx_data_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_enc_config_set_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_get_global_headers_fn_t
-    NOT_IMPLEMENTED,  // vpx_codec_get_preview_frame_fn_t
-    NOT_IMPLEMENTED   // vpx_codec_enc_mr_get_mem_loc_fn_t
+    NULL,  // vpx_codec_enc_cfg_map_t
+    NULL,  // vpx_codec_encode_fn_t
+    NULL,  // vpx_codec_get_cx_data_fn_t
+    NULL,  // vpx_codec_enc_config_set_fn_t
+    NULL,  // vpx_codec_get_global_headers_fn_t
+    NULL,  // vpx_codec_get_preview_frame_fn_t
+    NULL   // vpx_codec_enc_mr_get_mem_loc_fn_t
   }
 };