17 files changed, 894 insertions, 775 deletions
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index a840b480a..a9d168cc8 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -18,8 +18,6 @@
 #include "vp9/common/vp9_idct.h"
 #include "vp9/common/vp9_systemdependent.h"
 
-#include "vp9/encoder/vp9_dct.h"
-
 static INLINE int fdct_round_shift(int input) {
   int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
   assert(INT16_MIN <= rv && rv <= INT16_MAX);
@@ -157,32 +155,36 @@ static const transform_2d FHT_4[] = {
   { fadst4, fadst4 }   // ADST_ADST = 3
 };
 
-void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
-                        int stride, int tx_type) {
-  int16_t out[4 * 4];
-  int16_t *outptr = &out[0];
-  int i, j;
-  int16_t temp_in[4], temp_out[4];
-  const transform_2d ht = FHT_4[tx_type];
+void vp9_fht4x4_c(const int16_t *input, int16_t *output,
+                  int stride, int tx_type) {
+  if (tx_type == DCT_DCT) {
+    vp9_fdct4x4_c(input, output, stride);
+  } else {
+    int16_t out[4 * 4];
+    int16_t *outptr = &out[0];
+    int i, j;
+    int16_t temp_in[4], temp_out[4];
+    const transform_2d ht = FHT_4[tx_type];
 
-  // Columns
-  for (i = 0; i < 4; ++i) {
-    for (j = 0; j < 4; ++j)
-      temp_in[j] = input[j * stride + i] * 16;
-    if (i == 0 && temp_in[0])
-      temp_in[0] += 1;
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < 4; ++j)
-      outptr[j * 4 + i] = temp_out[j];
-  }
+    // Columns
+    for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j)
+        temp_in[j] = input[j * stride + i] * 16;
+      if (i == 0 && temp_in[0])
+        temp_in[0] += 1;
+      ht.cols(temp_in, temp_out);
+      for (j = 0; j < 4; ++j)
+        outptr[j * 4 + i] = temp_out[j];
+    }
 
-  // Rows
-  for (i = 0; i < 4; ++i) {
-    for (j = 0; j < 4; ++j)
-      temp_in[j] = out[j + i * 4];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < 4; ++j)
-      output[j + i * 4] = (temp_out[j] + 1) >> 2;
+    // Rows
+    for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j)
+        temp_in[j] = out[j + i * 4];
+      ht.rows(temp_in, temp_out);
+      for (j = 0; j < 4; ++j)
+        output[j + i * 4] = (temp_out[j] + 1) >> 2;
+    }
   }
 }
 
@@ -565,30 +567,34 @@ static const transform_2d FHT_8[] = {
   { fadst8, fadst8 }   // ADST_ADST = 3
 };
 
-void vp9_short_fht8x8_c(const int16_t *input, int16_t *output,
-                        int stride, int tx_type) {
-  int16_t out[64];
-  int16_t *outptr = &out[0];
-  int i, j;
-  int16_t temp_in[8], temp_out[8];
-  const transform_2d ht = FHT_8[tx_type];
-
-  // Columns
-  for (i = 0; i < 8; ++i) {
-    for (j = 0; j < 8; ++j)
-      temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < 8; ++j)
-      outptr[j * 8 + i] = temp_out[j];
-  }
+void vp9_fht8x8_c(const int16_t *input, int16_t *output,
+                  int stride, int tx_type) {
+  if (tx_type == DCT_DCT) {
+    vp9_fdct8x8_c(input, output, stride);
+  } else {
+    int16_t out[64];
+    int16_t *outptr = &out[0];
+    int i, j;
+    int16_t temp_in[8], temp_out[8];
+    const transform_2d ht = FHT_8[tx_type];
+
+    // Columns
+    for (i = 0; i < 8; ++i) {
+      for (j = 0; j < 8; ++j)
+        temp_in[j] = input[j * stride + i] * 4;
+      ht.cols(temp_in, temp_out);
+      for (j = 0; j < 8; ++j)
+        outptr[j * 8 + i] = temp_out[j];
+    }
 
-  // Rows
-  for (i = 0; i < 8; ++i) {
-    for (j = 0; j < 8; ++j)
-      temp_in[j] = out[j + i * 8];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < 8; ++j)
-      output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+    // Rows
+    for (i = 0; i < 8; ++i) {
+      for (j = 0; j < 8; ++j)
+        temp_in[j] = out[j + i * 8];
+      ht.rows(temp_in, temp_out);
+      for (j = 0; j < 8; ++j)
+        output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1;
+    }
   }
 }
 
@@ -958,31 +964,34 @@ static const transform_2d FHT_16[] = {
   { fadst16, fadst16 }   // ADST_ADST = 3
 };
 
-void vp9_short_fht16x16_c(const int16_t *input, int16_t *output,
-                          int stride, int tx_type) {
-  int16_t out[256];
-  int16_t *outptr = &out[0];
-  int i, j;
-  int16_t temp_in[16], temp_out[16];
-  const transform_2d ht = FHT_16[tx_type];
-
-  // Columns
-  for (i = 0; i < 16; ++i) {
-    for (j = 0; j < 16; ++j)
-      temp_in[j] = input[j * stride + i] * 4;
-    ht.cols(temp_in, temp_out);
-    for (j = 0; j < 16; ++j)
-      outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
-//      outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
-  }
+void vp9_fht16x16_c(const int16_t *input, int16_t *output,
+                    int stride, int tx_type) {
+  if (tx_type == DCT_DCT) {
+    vp9_fdct16x16_c(input, output, stride);
+  } else {
+    int16_t out[256];
+    int16_t *outptr = &out[0];
+    int i, j;
+    int16_t temp_in[16], temp_out[16];
+    const transform_2d ht = FHT_16[tx_type];
+
+    // Columns
+    for (i = 0; i < 16; ++i) {
+      for (j = 0; j < 16; ++j)
+        temp_in[j] = input[j * stride + i] * 4;
+      ht.cols(temp_in, temp_out);
+      for (j = 0; j < 16; ++j)
+        outptr[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2;
+    }
 
-  // Rows
-  for (i = 0; i < 16; ++i) {
-    for (j = 0; j < 16; ++j)
-      temp_in[j] = out[j + i * 16];
-    ht.rows(temp_in, temp_out);
-    for (j = 0; j < 16; ++j)
-      output[j + i * 16] = temp_out[j];
+    // Rows
+    for (i = 0; i < 16; ++i) {
+      for (j = 0; j < 16; ++j)
+        temp_in[j] = out[j + i * 16];
+      ht.rows(temp_in, temp_out);
+      for (j = 0; j < 16; ++j)
+        output[j + i * 16] = temp_out[j];
+    }
   }
 }
 
@@ -1375,27 +1384,3 @@ void vp9_fdct32x32_rd_c(const int16_t *input, int16_t *out, int stride) {
       out[j + i * 32] = temp_out[j];
   }
 }
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride) {
-  if (tx_type == DCT_DCT)
-    vp9_fdct4x4(input, output, stride);
-  else
-    vp9_short_fht4x4(input, output, stride, tx_type);
-}
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride) {
-  if (tx_type == DCT_DCT)
-    vp9_fdct8x8(input, output, stride);
-  else
-    vp9_short_fht8x8(input, output, stride, tx_type);
-}
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                  int stride) {
-  if (tx_type == DCT_DCT)
-    vp9_fdct16x16(input, output, stride);
-  else
-    vp9_short_fht16x16(input, output, stride, tx_type);
-}
diff --git a/vp9/encoder/vp9_dct.h b/vp9/encoder/vp9_dct.h
deleted file mode 100644
index cf5f001a9..000000000
--- a/vp9/encoder/vp9_dct.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_VP9_DCT_H_
-#define VP9_ENCODER_VP9_DCT_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void vp9_fht4x4(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride);
-
-void vp9_fht8x8(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                int stride);
-
-void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
-                  int stride);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  // VP9_ENCODER_VP9_DCT_H_
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index eaedf1e07..442170f0c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) {
 }
 
 // Macroblock activity masking
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
 #if USE_ACT_INDEX
   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
@@ -491,24 +491,26 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   } else {
     // Note how often each mode chosen as best
     cpi->mode_chosen_counts[mb_mode_index]++;
-    if (is_inter_block(mbmi) &&
-        (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
-      int_mv best_mv[2];
-      for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
-        best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
-      vp9_update_mv_count(cpi, x, best_mv);
-    }
 
-    if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
-      const int ctx = vp9_get_pred_context_switchable_interp(xd);
-      ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+    if (is_inter_block(mbmi)) {
+      if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+        int_mv best_mv[2];
+        for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+          best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+        vp9_update_mv_count(cpi, x, best_mv);
+      }
+
+      if (cm->interp_filter == SWITCHABLE) {
+        const int ctx = vp9_get_pred_context_switchable_interp(xd);
+        ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+      }
     }
 
     cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
     cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
     cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
 
-    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
+    for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
       cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
   }
 }
@@ -671,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-    vp9_activity_masking(cpi, x);
+    activity_masking(cpi, x);
 
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_clear_system_state();  // __asm emms;
@@ -711,36 +713,40 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
 
 static void update_stats(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCK *const x = &cpi->mb;
-  MACROBLOCKD *const xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mi_8x8[0];
-  MB_MODE_INFO *const mbmi = &mi->mbmi;
+  const MACROBLOCK *const x = &cpi->mb;
+  const MACROBLOCKD *const xd = &x->e_mbd;
+  const MODE_INFO *const mi = xd->mi_8x8[0];
+  const MB_MODE_INFO *const mbmi = &mi->mbmi;
 
   if (!frame_is_intra_only(cm)) {
     const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
                                                      SEG_LVL_REF_FRAME);
+    if (!seg_ref_active) {
+      FRAME_COUNTS *const counts = &cm->counts;
+      const int inter_block = is_inter_block(mbmi);
 
-    if (!seg_ref_active)
-      cm->counts.intra_inter[vp9_get_intra_inter_context(xd)]
-                            [is_inter_block(mbmi)]++;
-
-    // If the segment reference feature is enabled we have only a single
-    // reference frame allowed for the segment so exclude it from
-    // the reference frame counts used to work out probabilities.
-    if (is_inter_block(mbmi) && !seg_ref_active) {
-      if (cm->reference_mode == REFERENCE_MODE_SELECT)
-        cm->counts.comp_inter[vp9_get_reference_mode_context(cm, xd)]
-                             [has_second_ref(mbmi)]++;
-
-      if (has_second_ref(mbmi)) {
-        cm->counts.comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
-                           [mbmi->ref_frame[0] == GOLDEN_FRAME]++;
-      } else {
-        cm->counts.single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
-                             [mbmi->ref_frame[0] != LAST_FRAME]++;
-        if (mbmi->ref_frame[0] != LAST_FRAME)
-          cm->counts.single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
-                               [mbmi->ref_frame[0] != GOLDEN_FRAME]++;
+      counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
+
+      // If the segment reference feature is enabled we have only a single
+      // reference frame allowed for the segment so exclude it from
+      // the reference frame counts used to work out probabilities.
+      if (inter_block) {
+        const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
+
+        if (cm->reference_mode == REFERENCE_MODE_SELECT)
+          counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
+                            [has_second_ref(mbmi)]++;
+
+        if (has_second_ref(mbmi)) {
+          counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
+                          [ref0 == GOLDEN_FRAME]++;
+        } else {
+          counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
+                            [ref0 != LAST_FRAME]++;
+          if (ref0 != LAST_FRAME)
+            counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
+                              [ref0 != GOLDEN_FRAME]++;
+        }
       }
     }
   }
@@ -1072,17 +1078,18 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
   } else {
     // Note how often each mode chosen as best
     cpi->mode_chosen_counts[mb_mode_index]++;
-    if (is_inter_block(mbmi) &&
-        (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV)) {
-      int_mv best_mv[2];
-      for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
-        best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
-      vp9_update_mv_count(cpi, x, best_mv);
-    }
+    if (is_inter_block(mbmi)) {
+      if (mbmi->sb_type < BLOCK_8X8 || mbmi->mode == NEWMV) {
+        int_mv best_mv[2];
+        for (i = 0; i < 1 + has_second_ref(mbmi); ++i)
+          best_mv[i].as_int = mbmi->ref_mvs[mbmi->ref_frame[i]][0].as_int;
+        vp9_update_mv_count(cpi, x, best_mv);
+      }
 
-    if (cm->interp_filter == SWITCHABLE && is_inter_mode(mbmi->mode)) {
-      const int ctx = vp9_get_pred_context_switchable_interp(xd);
-      ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+      if (cm->interp_filter == SWITCHABLE) {
+        const int ctx = vp9_get_pred_context_switchable_interp(xd);
+        ++cm->counts.switchable_interp[ctx][mbmi->interp_filter];
+      }
     }
   }
 }
@@ -2181,108 +2188,6 @@ static void switch_tx_mode(VP9_COMP *cpi) {
     cpi->common.tx_mode = ALLOW_32X32;
 }
 
-static void encode_frame_internal(VP9_COMP *cpi) {
-  int mi_row;
-  MACROBLOCK *const x = &cpi->mb;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-//           cpi->common.current_video_frame, cpi->common.show_frame,
-//           cm->frame_type);
-
-  vp9_zero(cm->counts.switchable_interp);
-  vp9_zero(cpi->tx_stepdown_count);
-
-  xd->mi_8x8 = cm->mi_grid_visible;
-  // required for vp9_frame_init_quantizer
-  xd->mi_8x8[0] = cm->mi;
-
-  xd->last_mi = cm->prev_mi;
-
-  vp9_zero(cm->counts.mv);
-  vp9_zero(cpi->coef_counts);
-  vp9_zero(cm->counts.eob_branch);
-
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
-      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
-  switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
-
-  vp9_frame_init_quantizer(cpi);
-
-  vp9_initialize_rd_consts(cpi);
-  vp9_initialize_me_consts(cpi, cm->base_qindex);
-  switch_tx_mode(cpi);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-    // Initialize encode frame context.
-    init_encode_frame_mb_context(cpi);
-
-    // Build a frame level activity map
-    build_activity_map(cpi);
-  }
-
-  // Re-initialize encode frame context.
-  init_encode_frame_mb_context(cpi);
-
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
-
-  set_prev_mi(cm);
-
-  {
-    struct vpx_usec_timer emr_timer;
-    vpx_usec_timer_start(&emr_timer);
-
-    {
-      // Take tiles into account and give start/end MB
-      int tile_col, tile_row;
-      TOKENEXTRA *tp = cpi->tok;
-      const int tile_cols = 1 << cm->log2_tile_cols;
-      const int tile_rows = 1 << cm->log2_tile_rows;
-
-      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-        for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-          TileInfo tile;
-          TOKENEXTRA *tp_old = tp;
-
-          // For each row of SBs in the frame
-          vp9_tile_init(&tile, cm, tile_row, tile_col);
-          for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_sb_row(cpi, &tile, mi_row, &tp);
-
-          cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
-          assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
-        }
-      }
-    }
-
-    vpx_usec_timer_mark(&emr_timer);
-    cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
-  }
-
-  if (cpi->sf.skip_encode_sb) {
-    int j;
-    unsigned int intra_count = 0, inter_count = 0;
-    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
-      intra_count += cm->counts.intra_inter[j][0];
-      inter_count += cm->counts.intra_inter[j][1];
-    }
-    cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
-    cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
-    cpi->sf.skip_encode_frame &= cm->show_frame;
-  } else {
-    cpi->sf.skip_encode_frame = 0;
-  }
-
-#if 0
-  // Keep record of the total distortion this time around for future use
-  cpi->last_frame_distortion = cpi->frame_distortion;
-#endif
-}
 
 static int check_dual_ref_flags(VP9_COMP *cpi) {
   const int ref_flags = cpi->ref_frame_flags;
@@ -2572,28 +2477,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                      &dummy_rate, &dummy_dist, 1);
   }
 }
+// end RTC play code
 
-
-static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+static void encode_frame_internal(VP9_COMP *cpi) {
   int mi_row;
-  MACROBLOCK * const x = &cpi->mb;
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCKD * const xd = &x->e_mbd;
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
 
 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
 //           cpi->common.current_video_frame, cpi->common.show_frame,
 //           cm->frame_type);
 
-// debug output
-#if DBG_PRNT_SEGMAP
-  {
-    FILE *statsfile;
-    statsfile = fopen("segmap2.stt", "a");
-    fprintf(statsfile, "\n");
-    fclose(statsfile);
-  }
-#endif
-
   vp9_zero(cm->counts.switchable_interp);
   vp9_zero(cpi->tx_stepdown_count);
 
@@ -2603,7 +2498,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
 
   xd->last_mi = cm->prev_mi;
 
-  vp9_zero(cpi->common.counts.mv);
+  vp9_zero(cm->counts.mv);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cm->counts.eob_branch);
 
@@ -2616,7 +2511,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   vp9_initialize_rd_consts(cpi);
   vp9_initialize_me_consts(cpi, cm->base_qindex);
   switch_tx_mode(cpi);
-  cpi->sf.always_this_block_size = BLOCK_16X16;
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
     // Initialize encode frame context.
@@ -2655,9 +2549,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
           // For each row of SBs in the frame
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
-
+               mi_row < tile.mi_row_end; mi_row += 8) {
+            if (cpi->sf.super_fast_rtc)
+              encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+            else
+              encode_sb_row(cpi, &tile, mi_row, &tp);
+          }
           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
         }
@@ -2687,8 +2584,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   cpi->last_frame_distortion = cpi->frame_distortion;
 #endif
 }
-// end RTC play code
-
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2763,10 +2658,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     select_tx_mode(cpi);
     cm->reference_mode = reference_mode;
 
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2846,10 +2738,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   } else {
     // Force the usage of the BILINEAR interp_filter.
     cm->interp_filter = BILINEAR;
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
   }
 }
 
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 376a899e0..c7507c13b 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -19,7 +19,6 @@
 #include "vp9/common/vp9_reconintra.h"
 #include "vp9/common/vp9_systemdependent.h"
 
-#include "vp9/encoder/vp9_dct.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_quantize.h"
 #include "vp9/encoder/vp9_rdopt.h"
@@ -571,7 +570,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       if (!x->skip_recode) {
         vp9_subtract_block(16, 16, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
-        vp9_fht16x16(tx_type, src_diff, coeff, diff_stride);
+        vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
         vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
                        p->quant, p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -591,7 +590,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
       if (!x->skip_recode) {
         vp9_subtract_block(8, 8, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
-        vp9_fht8x8(tx_type, src_diff, coeff, diff_stride);
+        vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
         vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
                        p->quant_shift, qcoeff, dqcoeff,
                        pd->dequant, p->zbin_extra, eob, scan_order->scan,
@@ -617,7 +616,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
         vp9_subtract_block(4, 4, src_diff, diff_stride,
                            src, p->src.stride, dst, pd->dst.stride);
         if (tx_type != DCT_DCT)
-          vp9_short_fht4x4(src_diff, coeff, diff_stride, tx_type);
+          vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
         else
           x->fwd_txm4x4(src_diff, coeff, diff_stride);
         vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 838f74e8c..dc35044d6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb,
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
-                          int section_target_bandwitdh) {
+int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh) {
   int q;
   const int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
@@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
-    const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats,
-                                     section_target_bandwidth);
+    const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+                                                section_target_bandwidth);
 
     rc->active_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 19b59815a..7e612183e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -10,20 +10,86 @@
 
 #ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
 #define VP9_ENCODER_VP9_FIRSTPASS_H_
-#include "vp9/encoder/vp9_onyx_int.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-void vp9_init_first_pass(VP9_COMP *cpi);
-void vp9_rc_get_first_pass_params(VP9_COMP *cpi);
-void vp9_first_pass(VP9_COMP *cpi);
-void vp9_end_first_pass(VP9_COMP *cpi);
+typedef struct {
+  double frame;
+  double intra_error;
+  double coded_error;
+  double sr_coded_error;
+  double ssim_weighted_pred_err;
+  double pcnt_inter;
+  double pcnt_motion;
+  double pcnt_second_ref;
+  double pcnt_neutral;
+  double MVr;
+  double mvr_abs;
+  double MVc;
+  double mvc_abs;
+  double MVrv;
+  double MVcv;
+  double mv_in_out_count;
+  double new_mv_count;
+  double duration;
+  double count;
+} FIRSTPASS_STATS;
 
-void vp9_init_second_pass(VP9_COMP *cpi);
-void vp9_rc_get_second_pass_params(VP9_COMP *cpi);
-void vp9_end_second_pass(VP9_COMP *cpi);
+struct twopass_rc {
+  unsigned int section_intra_rating;
+  unsigned int next_iiratio;
+  unsigned int this_iiratio;
+  FIRSTPASS_STATS total_stats;
+  FIRSTPASS_STATS this_frame_stats;
+  FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
+  FIRSTPASS_STATS total_left_stats;
+  int first_pass_done;
+  int64_t bits_left;
+  int64_t clip_bits_total;
+  double avg_iiratio;
+  double modified_error_min;
+  double modified_error_max;
+  double modified_error_total;
+  double modified_error_left;
+  double kf_intra_err_min;
+  double gf_intra_err_min;
+  int static_scene_max_gf_interval;
+  int kf_bits;
+  // Remaining error from uncoded frames in a gf group. Two pass use only
+  int64_t gf_group_error_left;
+
+  // Projected total bits available for a key frame group of frames
+  int64_t kf_group_bits;
+
+  // Error score of frames still to be coded in kf group
+  int64_t kf_group_error_left;
+
+  // Projected Bits available for a group of frames including 1 GF or ARF
+  int64_t gf_group_bits;
+  // Bits for the golden frame or ARF - 2 pass only
+  int gf_bits;
+  int alt_extra_bits;
+
+  int sr_update_lag;
+
+  int kf_zeromotion_pct;
+  int gf_zeromotion_pct;
+};
+
+struct VP9_COMP;
+
+void vp9_init_first_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
+void vp9_first_pass(struct VP9_COMP *cpi);
+void vp9_end_first_pass(struct VP9_COMP *cpi);
+
+void vp9_init_second_pass(struct VP9_COMP *cpi);
+void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_end_second_pass(struct VP9_COMP *cpi);
+int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh);
 
 // Post encode update of the rate control parameters for 2-pass
 void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_lookahead.c b/vp9/encoder/vp9_lookahead.c
index e6e59c05a..4b642e2b6 100644
--- a/vp9/encoder/vp9_lookahead.c
+++ b/vp9/encoder/vp9_lookahead.c
@@ -11,9 +11,12 @@
 #include <stdlib.h>
 
 #include "./vpx_config.h"
+
 #include "vp9/common/vp9_common.h"
+
 #include "vp9/encoder/vp9_extend.h"
 #include "vp9/encoder/vp9_lookahead.h"
+#include "vp9/encoder/vp9_onyx_int.h"
 
 struct lookahead_ctx {
   unsigned int max_sz;         /* Absolute size of the queue */
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 36591bd42..198e11cc2 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -475,11 +475,9 @@ static INLINE int check_bounds(const MACROBLOCK *x, int row, int col,
          ((col + range) <= x->mv_col_max);
 }
 
-static INLINE int check_point(const MACROBLOCK *x, const MV *mv) {
-  return (mv->col < x->mv_col_min) |
-         (mv->col > x->mv_col_max) |
-         (mv->row < x->mv_row_min) |
-         (mv->row > x->mv_row_max);
+static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
+  return (mv->col >= x->mv_col_min) && (mv->col <= x->mv_col_max) &&
+         (mv->row >= x->mv_row_min) && (mv->row <= x->mv_row_max);
 }
 
 #define CHECK_BETTER \
@@ -572,7 +570,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
         for (i = 0; i < num_candidates[t]; i++) {
           this_mv.row = br + candidates[t][i].row;
           this_mv.col = bc + candidates[t][i].col;
-          if (check_point(x, &this_mv))
+          if (!is_mv_in(x, &this_mv))
             continue;
           this_offset = base_offset + (this_mv.row * in_what_stride) +
                                        this_mv.col;
@@ -616,7 +614,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
           for (i = 0; i < num_candidates[s]; i++) {
             this_mv.row = br + candidates[s][i].row;
             this_mv.col = bc + candidates[s][i].col;
-            if (check_point(x, &this_mv))
+            if (!is_mv_in(x, &this_mv))
               continue;
             this_offset = base_offset + (this_mv.row * in_what_stride) +
                                          this_mv.col;
@@ -656,7 +654,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
           for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
             this_mv.row = br + candidates[s][next_chkpts_indices[i]].row;
             this_mv.col = bc + candidates[s][next_chkpts_indices[i]].col;
-            if (check_point(x, &this_mv))
+            if (!is_mv_in(x, &this_mv))
               continue;
             this_offset = base_offset + (this_mv.row * (in_what_stride)) +
                                          this_mv.col;
@@ -695,7 +693,7 @@ static int vp9_pattern_search(const MACROBLOCK *x,
         for (i = 0; i < 4; i++) {
           this_mv.row = br + neighbors[i].row;
           this_mv.col = bc + neighbors[i].col;
-          if (check_point(x, &this_mv))
+          if (!is_mv_in(x, &this_mv))
             continue;
           this_offset = base_offset + this_mv.row * in_what_stride +
                             this_mv.col;
@@ -1685,10 +1683,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
       this_mv.row = ref_mv->row + neighbors[j].row;
       this_mv.col = ref_mv->col + neighbors[j].col;
 
-      if ((this_mv.col > x->mv_col_min) &&
-          (this_mv.col < x->mv_col_max) &&
-          (this_mv.row > x->mv_row_min) &&
-          (this_mv.row < x->mv_row_max)) {
+      if (is_mv_in(x, &this_mv)) {
         const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
                                                 this_mv.col];
         thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride,
@@ -1875,10 +1870,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
       this_mv.row = ref_mv->row + neighbors[j].row;
       this_mv.col = ref_mv->col + neighbors[j].col;
 
-      if ((this_mv.col > x->mv_col_min) &&
-          (this_mv.col < x->mv_col_max) &&
-          (this_mv.row > x->mv_row_min) &&
-          (this_mv.row < x->mv_row_max)) {
+      if (is_mv_in(x, &this_mv)) {
         const uint8_t *check_here = &in_what[this_mv.row * in_what_stride +
                                                 this_mv.col];
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 2f1973793..58e58530d 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -572,7 +572,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
                                    int speed) {
   int i;
   sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = (speed < 1);
+  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
   if (speed == 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check  = 1;
@@ -590,7 +590,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
     sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -626,7 +626,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
     sf->last_partitioning_redo_frequency = 3;
 
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->use_lp32x32fdct = 1;
     sf->mode_skip_start = 11;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -743,7 +743,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
                                  int speed) {
   sf->static_segmentation = 0;
   sf->adaptive_rd_thresh = 1;
-  sf->recode_loop = (speed < 1);
+  sf->recode_loop = ((speed < 1) ? ALLOW_RECODE : ALLOW_RECODE_KFMAXBW);
   if (speed == 1) {
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check = 1;
@@ -761,7 +761,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
     sf->adaptive_pred_interp_filter = 1;
     sf->auto_mv_step_size = 1;
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
@@ -797,7 +797,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
     sf->last_partitioning_redo_frequency = 3;
 
     sf->adaptive_rd_thresh = 2;
-    sf->recode_loop = 2;
+    sf->recode_loop = ALLOW_RECODE_KFARFGF;
     sf->use_lp32x32fdct = 1;
     sf->mode_skip_start = 11;
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
@@ -846,6 +846,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
   }
   if (speed >= 6) {
     sf->super_fast_rtc = 1;
+    sf->always_this_block_size = BLOCK_16X16;
   }
 }
 
@@ -865,7 +866,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   // best quality defaults
   sf->RD = 1;
   sf->search_method = NSTEP;
-  sf->recode_loop = 1;
+  sf->recode_loop = ALLOW_RECODE;
   sf->subpel_search_method = SUBPEL_TREE;
   sf->subpel_iters_per_step = 2;
   sf->subpel_force_stop = 0;
@@ -933,7 +934,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
 
   // No recode for 1 pass.
   if (cpi->pass == 0) {
-    sf->recode_loop = 0;
+    sf->recode_loop = DISALLOW_RECODE;
     sf->optimize_coefficients = 0;
   }
 
@@ -1144,6 +1145,123 @@ static int64_t rescale(int val, int64_t num, int denom) {
   return (llval * llnum / llden);
 }
 
+// Initialize layer context data from init_config().
+static void init_layer_context(VP9_COMP *const cpi) {
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  int temporal_layer = 0;
+  cpi->svc.spatial_layer_id = 0;
+  cpi->svc.temporal_layer_id = 0;
+  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+      ++temporal_layer) {
+    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+    RATE_CONTROL *const lrc = &lc->rc;
+    lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q];
+    lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality;
+    lrc->last_q[INTER_FRAME] = lrc->active_worst_quality;
+    lrc->ni_av_qi = lrc->active_worst_quality;
+    lrc->total_actual_bits = 0;
+    lrc->total_target_vs_actual = 0;
+    lrc->ni_tot_qi = 0;
+    lrc->tot_q = 0.0;
+    lrc->avg_q = 0.0;
+    lrc->ni_frames = 0;
+    lrc->decimation_count = 0;
+    lrc->decimation_factor = 0;
+    lrc->rate_correction_factor = 1.0;
+    lrc->key_frame_rate_correction_factor = 1.0;
+    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
+        1000;
+    lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level),
+                               lc->target_bandwidth, 1000);
+    lrc->bits_off_target = lrc->buffer_level;
+  }
+}
+
+// Update the layer context from a change_config() call.
+static void update_layer_context_change_config(VP9_COMP *const cpi,
+                                               const int target_bandwidth) {
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  const RATE_CONTROL *const rc = &cpi->rc;
+  int temporal_layer = 0;
+  float bitrate_alloc = 1.0;
+  for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers;
+      ++temporal_layer) {
+    LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+    RATE_CONTROL *const lrc = &lc->rc;
+    lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000;
+    bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth;
+    // Update buffer-related quantities.
+    lc->starting_buffer_level = oxcf->starting_buffer_level * bitrate_alloc;
+    lc->optimal_buffer_level = oxcf->optimal_buffer_level * bitrate_alloc;
+    lc->maximum_buffer_size = oxcf->maximum_buffer_size * bitrate_alloc;
+    lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+    lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size);
+    // Update framerate-related quantities.
+    lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+    lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+    lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+    // Update qp-related quantities.
+    lrc->worst_quality = rc->worst_quality;
+    lrc->best_quality = rc->best_quality;
+    lrc->active_worst_quality = rc->active_worst_quality;
+  }
+}
+
+// Prior to encoding the frame, update framerate-related quantities
+// for the current layer.
+static void update_layer_framerate(VP9_COMP *const cpi) {
+  int temporal_layer = cpi->svc.temporal_layer_id;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
+  LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
+  RATE_CONTROL *const lrc = &lc->rc;
+  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+  lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+  // Update the average layer frame size (non-cumulative per-frame-bw).
+  if (temporal_layer == 0) {
+    lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+  } else {
+    double prev_layer_framerate = oxcf->framerate /
+        oxcf->ts_rate_decimator[temporal_layer - 1];
+    int prev_layer_target_bandwidth =
+        oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+    lc->avg_frame_size =
+        (int)(lc->target_bandwidth - prev_layer_target_bandwidth) /
+        (lc->framerate - prev_layer_framerate);
+  }
+}
+
+// Prior to encoding the frame, set the layer context, for the current layer
+// to be encoded, to the cpi struct.
+static void restore_layer_context(VP9_COMP *const cpi) {
+  int temporal_layer = cpi->svc.temporal_layer_id;
+  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+  int frame_since_key = cpi->rc.frames_since_key;
+  int frame_to_key = cpi->rc.frames_to_key;
+  cpi->rc = lc->rc;
+  cpi->oxcf.target_bandwidth = lc->target_bandwidth;
+  cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
+  cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
+  cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
+  cpi->output_framerate = lc->framerate;
+  // Reset the frames_since_key and frames_to_key counters to their values
+  // before the layer restore. Keep these defined for the stream (not layer).
+  cpi->rc.frames_since_key = frame_since_key;
+  cpi->rc.frames_to_key = frame_to_key;
+}
+
+// Save the layer context after encoding the frame.
+static void save_layer_context(VP9_COMP *const cpi) {
+  int temporal_layer = cpi->svc.temporal_layer_id;
+  LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+  lc->rc = cpi->rc;
+  lc->target_bandwidth = cpi->oxcf.target_bandwidth;
+  lc->starting_buffer_level = cpi->oxcf.starting_buffer_level;
+  lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level;
+  lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size;
+  lc->framerate = cpi->output_framerate;
+}
+
 static void set_tile_limits(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
 
@@ -1170,6 +1288,16 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   cm->subsampling_y = 0;
   vp9_alloc_compressor_data(cpi);
 
+  // Spatial scalability.
+  cpi->svc.number_spatial_layers = oxcf->ss_number_layers;
+  // Temporal scalability.
+  cpi->svc.number_temporal_layers = oxcf->ts_number_layers;
+
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    init_layer_context(cpi);
+  }
+
   // change includes all joint functionality
   vp9_change_config(ptr, oxcf);
 
@@ -1210,9 +1338,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   cpi->gld_fb_idx = 1;
   cpi->alt_fb_idx = 2;
 
-  cpi->current_layer = 0;
-  cpi->use_svc = 0;
-
   set_tile_limits(cpi);
 
   cpi->fixed_divide[0] = 0;
@@ -1220,7 +1345,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
     cpi->fixed_divide[i] = 0x80000 / i;
 }
 
-
 void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   VP9_COMP *cpi = (VP9_COMP *)(ptr);
   VP9_COMMON *const cm = &cpi->common;
@@ -1312,10 +1436,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
                                             cpi->oxcf.target_bandwidth, 1000);
   // Under a configuration change, where maximum_buffer_size may change,
   // keep buffer level clipped to the maximum allowed buffer size.
-  if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) {
-    cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
-    cpi->rc.buffer_level = cpi->rc.bits_off_target;
-  }
+  cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target,
+                                cpi->oxcf.maximum_buffer_size);
+  cpi->rc.buffer_level = MIN(cpi->rc.buffer_level,
+                             cpi->oxcf.maximum_buffer_size);
 
   // Set up frame rate and related parameters rate control values.
   vp9_new_framerate(cpi, cpi->oxcf.framerate);
@@ -1350,6 +1474,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   }
   update_frame_size(cpi);
 
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    update_layer_context_change_config(cpi, cpi->oxcf.target_bandwidth);
+  }
+
   cpi->speed = cpi->oxcf.cpu_used;
 
   if (cpi->oxcf.lag_in_frames == 0) {
@@ -1573,6 +1702,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
 
   vp9_create_common(cm);
 
+  cpi->use_svc = 0;
+
   init_config((VP9_PTR)cpi, oxcf);
 
   init_pick_mode_context(cpi);
@@ -1588,9 +1719,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
   cpi->alt_is_last  = 0;
   cpi->gold_is_alt  = 0;
 
-  // Spatial scalability
-  cpi->number_spatial_layers = oxcf->ss_number_layers;
-
   // Create the encoder segmentation map and set all entries to 0
   CHECK_MEM_ERROR(cm, cpi->segmentation_map,
                   vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
@@ -1616,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
                                sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
   }
 
-#ifdef ENTROPY_STATS
-  if (cpi->pass != 1)
-    init_context_counters();
-#endif
-
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2431,8 +2554,8 @@ static int recode_loop_test(const VP9_COMP *cpi,
   // Is frame recode allowed.
   // Yes if either recode mode 1 is selected or mode 2 is selected
   // and the frame is a key frame, golden frame or alt_ref_frame
-  } else if ((cpi->sf.recode_loop == 1) ||
-             ((cpi->sf.recode_loop == 2) &&
+  } else if ((cpi->sf.recode_loop == ALLOW_RECODE) ||
+             ((cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF) &&
               (cm->frame_type == KEY_FRAME ||
                cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
     // General over and under shoot tests
@@ -2651,25 +2774,67 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 }
 #endif
 
+static void encode_without_recode_loop(VP9_COMP *cpi,
+                                       size_t *size,
+                                       uint8_t *dest,
+                                       int q) {
+  VP9_COMMON *const cm = &cpi->common;
+  vp9_clear_system_state();  // __asm emms;
+  vp9_set_quantizer(cpi, q);
+
+  // Set up entropy context depending on frame type. The decoder mandates
+  // the use of the default context, index 0, for keyframes and inter
+  // frames where the error_resilient_mode or intra_only flag is set. For
+  // other inter-frames the encoder currently uses only two contexts;
+  // context 1 for ALTREF frames and context 0 for the others.
+  if (cm->frame_type == KEY_FRAME) {
+    vp9_setup_key_frame(cpi);
+  } else {
+    if (!cm->intra_only && !cm->error_resilient_mode) {
+      cpi->common.frame_context_idx = cpi->refresh_alt_ref_frame;
+    }
+    vp9_setup_inter_frame(cpi);
+  }
+  // Variance adaptive and in frame q adjustment experiments are mutually
+  // exclusive.
+  if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
+    vp9_vaq_frame_setup(cpi);
+  } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
+    setup_in_frame_q_adj(cpi);
+  }
+  // transform / motion compensation build reconstruction frame
+  vp9_encode_frame(cpi);
+
+  // Update the skip mb flag probabilities based on the distribution
+  // seen in the last encoder iteration.
+  // update_base_skip_probs(cpi);
+  vp9_clear_system_state();  // __asm emms;
+}
+
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     size_t *size,
                                     uint8_t *dest,
-                                    int *q,
+                                    int q,
                                     int bottom_index,
-                                    int top_index,
-                                    int frame_over_shoot_limit,
-                                    int frame_under_shoot_limit) {
+                                    int top_index) {
   VP9_COMMON *const cm = &cpi->common;
   int loop_count = 0;
   int loop = 0;
   int overshoot_seen = 0;
   int undershoot_seen = 0;
   int q_low = bottom_index, q_high = top_index;
+  int frame_over_shoot_limit;
+  int frame_under_shoot_limit;
+
+  // Decide frame size bounds
+  vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
+                                   &frame_under_shoot_limit,
+                                   &frame_over_shoot_limit);
 
   do {
     vp9_clear_system_state();  // __asm emms;
 
-    vp9_set_quantizer(cpi, *q);
+    vp9_set_quantizer(cpi, q);
 
     if (loop_count == 0) {
       // Set up entropy context depending on frame type. The decoder mandates
@@ -2696,7 +2861,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     }
 
     // transform / motion compensation build reconstruction frame
-
     vp9_encode_frame(cpi);
 
     // Update the skip mb flag probabilities based on the distribution
@@ -2708,7 +2872,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     // Dummy pack of the bitstream using up to date stats to get an
     // accurate estimate of output frame size to determine if we need
     // to recode.
-    if (cpi->sf.recode_loop != 0) {
+    if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
       vp9_save_coding_context(cpi);
       cpi->dummy_packing = 1;
       if (!cpi->sf.super_fast_rtc)
@@ -2727,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
       if ((cm->frame_type == KEY_FRAME) &&
            cpi->rc.this_key_frame_forced &&
            (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) {
-        int last_q = *q;
+        int last_q = q;
         int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
@@ -2743,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             (kf_err > low_err_target &&
              cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
           // Lower q_high
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           // Adjust Q
-          *q = ((*q) * high_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low) >> 1);
+          q = (q * high_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low) >> 1);
         } else if (kf_err < low_err_target &&
                    cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
           // The key frame is much better than the previous frame
           // Raise q_low
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           // Adjust Q
-          *q = ((*q) * low_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low + 1) >> 1);
+          q = (q * low_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low + 1) >> 1);
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else if (recode_loop_test(
           cpi, frame_over_shoot_limit, frame_under_shoot_limit,
-          *q, MAX(q_high, top_index), bottom_index)) {
+          q, MAX(q_high, top_index), bottom_index)) {
         // Is the projected frame size out of range and are we allowed
         // to attempt to recode.
-        int last_q = *q;
+        int last_q = q;
         int retries = 0;
 
         // Frame size out of permitted range:
@@ -2781,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             q_high = cpi->rc.worst_quality;
 
           // Raise Qlow as to at least the current value
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           if (undershoot_seen || loop_count > 1) {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 1);
 
-            *q = (q_high + q_low + 1) / 2;
+            q = (q_high + q_low + 1) / 2;
           } else {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, MAX(q_high, top_index));
 
-            while (*q < q_low && retries < 10) {
+            while (q < q_low && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, MAX(q_high, top_index));
               retries++;
             }
@@ -2806,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           overshoot_seen = 1;
         } else {
           // Frame is too small
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           if (overshoot_seen || loop_count > 1) {
             vp9_rc_update_rate_correction_factors(cpi, 1);
-            *q = (q_high + q_low) / 2;
+            q = (q_high + q_low) / 2;
           } else {
             vp9_rc_update_rate_correction_factors(cpi, 0);
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, top_index);
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
             if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
-                *q < q_low) {
-              q_low = *q;
+                q < q_low) {
+              q_low = q;
             }
 
-            while (*q > q_high && retries < 10) {
+            while (q > q_high && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, top_index);
               retries++;
             }
@@ -2836,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else {
         loop = 0;
       }
@@ -2911,20 +3075,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   VP9_COMMON *const cm = &cpi->common;
   TX_SIZE t;
   int q;
-  int frame_over_shoot_limit;
-  int frame_under_shoot_limit;
   int top_index;
   int bottom_index;
 
   SPEED_FEATURES *const sf = &cpi->sf;
-  unsigned int max_mv_def = MIN(cpi->common.width, cpi->common.height);
+  unsigned int max_mv_def = MIN(cm->width, cm->height);
   struct segmentation *const seg = &cm->seg;
 
   set_ext_overrides(cpi);
 
   /* Scale the source buffer, if required. */
-  if (cm->mi_cols * 8 != cpi->un_scaled_source->y_width ||
-      cm->mi_rows * 8 != cpi->un_scaled_source->y_height) {
+  if (cm->mi_cols * MI_SIZE != cpi->un_scaled_source->y_width ||
+      cm->mi_rows * MI_SIZE != cpi->un_scaled_source->y_height) {
     scale_and_extend_frame_nonnormative(cpi->un_scaled_source,
                                         &cpi->scaled_source);
     cpi->Source = &cpi->scaled_source;
@@ -2933,12 +3095,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
   scale_references(cpi);
 
-  // Clear down mmx registers to allow floating point in what follows.
   vp9_clear_system_state();
 
-  // Clear zbin over-quant value and mode boost values.
-  cpi->zbin_mode_boost = 0;
-
   // Enable or disable mode based tweaking of the zbin.
   // For 2 pass only used where GF/ARF prediction quality
   // is above a threshold.
@@ -2946,7 +3104,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cpi->zbin_mode_boost_enabled = 0;
 
   // Current default encoder behavior for the altref sign bias.
-  cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
+  cm->ref_frame_sign_bias[ALTREF_FRAME] = cpi->rc.source_alt_ref_active;
 
   // Set default state for segment based loop filter update flags.
   cm->lf.mode_ref_delta_update = 0;
@@ -2955,7 +3113,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   cpi->mv_step_param = vp9_init_search_range(cpi, max_mv_def);
   // Initialize cpi->max_mv_magnitude and cpi->mv_step_param if appropriate.
   if (sf->auto_mv_step_size) {
-    if (frame_is_intra_only(&cpi->common)) {
+    if (frame_is_intra_only(cm)) {
       // Initialize max_mv_magnitude for use in the first INTER frame
       // after a key/intra-only frame.
       cpi->max_mv_magnitude = max_mv_def;
@@ -2964,8 +3122,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
         // Allow mv_steps to correspond to twice the max mv magnitude found
         // in the previous frame, capped by the default max_mv_magnitude based
         // on resolution.
-        cpi->mv_step_param = vp9_init_search_range(
-            cpi, MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+        cpi->mv_step_param = vp9_init_search_range(cpi, MIN(max_mv_def, 2 *
+                                 cpi->max_mv_magnitude));
       cpi->max_mv_magnitude = 0;
     }
   }
@@ -3002,9 +3160,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // static regions if indicated.
   // Only allowed in second pass of two pass (as requires lagged coding)
   // and if the relevant speed feature flag is set.
-  if ((cpi->pass == 2) && (cpi->sf.static_segmentation)) {
+  if (cpi->pass == 2 && cpi->sf.static_segmentation)
     configure_static_seg_features(cpi);
-  }
 
   // For 1 pass CBR, check if we are dropping this frame.
   // Never drop on key frame.
@@ -3013,7 +3170,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
       cm->frame_type != KEY_FRAME) {
     if (vp9_rc_drop_frame(cpi)) {
       vp9_rc_postencode_update_drop_frame(cpi);
-      cm->current_video_frame++;
+      ++cm->current_video_frame;
       return;
     }
   }
@@ -3051,41 +3208,22 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   vp9_write_yuv_frame(cpi->Source);
 #endif
 
-  // Decide frame size bounds
-  vp9_rc_compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
-                                   &frame_under_shoot_limit,
-                                   &frame_over_shoot_limit);
-
   // Decide q and q bounds.
   q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
                                         &bottom_index,
                                         &top_index);
 
-  // JBB : This is realtime mode.  In real time mode the first frame
-  // should be larger. Q of 0 is disabled because we force tx size to be
-  // 16x16...
-  if (cpi->sf.super_fast_rtc) {
-    if (cpi->common.current_video_frame == 0)
-      q /= 3;
-
-    if (q == 0)
-      q++;
-  }
-
   if (!frame_is_intra_only(cm)) {
     cm->interp_filter = DEFAULT_INTERP_FILTER;
     /* TODO: Decide this more intelligently */
     set_high_precision_mv(cpi, (q < HIGH_PRECISION_MV_QTHRESH));
   }
 
-  encode_with_recode_loop(cpi,
-                          size,
-                          dest,
-                          &q,
-                          bottom_index,
-                          top_index,
-                          frame_over_shoot_limit,
-                          frame_under_shoot_limit);
+  if (cpi->sf.recode_loop == DISALLOW_RECODE) {
+    encode_without_recode_loop(cpi, size, dest, q);
+  } else {
+    encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
+  }
 
   // Special case code to reduce pulsing when key frames are forced at a
   // fixed interval. Note the reconstruction error if it is the frame before
@@ -3132,37 +3270,30 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   update_reference_frames(cpi);
 
   for (t = TX_4X4; t <= TX_32X32; t++)
-    full_to_model_counts(cpi->common.counts.coef[t],
-                         cpi->coef_counts[t]);
-  if (!cpi->common.error_resilient_mode &&
-      !cpi->common.frame_parallel_decoding_mode) {
-    vp9_adapt_coef_probs(&cpi->common);
-  }
-
-  if (!frame_is_intra_only(&cpi->common)) {
-    if (!cpi->common.error_resilient_mode &&
-        !cpi->common.frame_parallel_decoding_mode) {
-      vp9_adapt_mode_probs(&cpi->common);
-      vp9_adapt_mv_probs(&cpi->common, cpi->common.allow_high_precision_mv);
+    full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]);
+
+  if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
+    vp9_adapt_coef_probs(cm);
+
+  if (!frame_is_intra_only(cm)) {
+    if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
+      vp9_adapt_mode_probs(cm);
+      vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
     }
   }
 
-#ifdef ENTROPY_STATS
-  vp9_update_mode_context_stats(cpi);
-#endif
-
 #if 0
   output_frame_level_debug_stats(cpi);
 #endif
   if (cpi->refresh_golden_frame == 1)
-    cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN;
+    cm->frame_flags |= FRAMEFLAGS_GOLDEN;
   else
-    cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_GOLDEN;
+    cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
 
   if (cpi->refresh_alt_ref_frame == 1)
-    cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF;
+    cm->frame_flags |= FRAMEFLAGS_ALTREF;
   else
-    cm->frame_flags = cm->frame_flags&~FRAMEFLAGS_ALTREF;
+    cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
 
   get_ref_frame_flags(cpi);
 
@@ -3211,6 +3342,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // reset to normal state now that we are done.
   if (!cm->show_existing_frame)
     cm->last_show_frame = cm->show_frame;
+
   if (cm->show_frame) {
     // current mip will be the prev_mip for the next frame
     MODE_INFO *temp = cm->prev_mip;
@@ -3231,6 +3363,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     // update not a real frame
     ++cm->current_video_frame;
   }
+
   // restore prev_mi
   cm->prev_mi = cm->prev_mip + cm->mode_info_stride + 1;
   cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1;
@@ -3516,6 +3649,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
     adjust_frame_rate(cpi);
   }
 
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    update_layer_framerate(cpi);
+    restore_layer_context(cpi);
+  }
+
   // start with a 0 size frame
   *size = 0;
 
@@ -3591,6 +3730,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags,
     cpi->droppable = !frame_is_reference(cpi);
   }
 
+  // Save layer specific state.
+  if (cpi->svc.number_temporal_layers > 1 &&
+      cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    save_layer_context(cpi);
+  }
+
   vpx_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
 
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index d0ca962db..90ed606f6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -23,6 +23,7 @@
 #include "vp9/common/vp9_onyxc_int.h"
 
 #include "vp9/encoder/vp9_encodemb.h"
+#include "vp9/encoder/vp9_firstpass.h"
 #include "vp9/encoder/vp9_lookahead.h"
 #include "vp9/encoder/vp9_mbgraph.h"
 #include "vp9/encoder/vp9_mcomp.h"
@@ -44,8 +45,9 @@ extern "C" {
 #else
 #define MIN_GF_INTERVAL             4
 #endif
-#define DEFAULT_GF_INTERVAL         7
+#define DEFAULT_GF_INTERVAL         10
 #define DEFAULT_KF_BOOST            2000
+#define DEFAULT_GF_BOOST            2000
 
 #define KEY_FRAME_CONTEXT 5
 
@@ -78,28 +80,6 @@ typedef struct {
   FRAME_CONTEXT fc;
 } CODING_CONTEXT;
 
-typedef struct {
-  double frame;
-  double intra_error;
-  double coded_error;
-  double sr_coded_error;
-  double ssim_weighted_pred_err;
-  double pcnt_inter;
-  double pcnt_motion;
-  double pcnt_second_ref;
-  double pcnt_neutral;
-  double MVr;
-  double mvr_abs;
-  double MVc;
-  double mvc_abs;
-  double MVrv;
-  double MVcv;
-  double mv_in_out_count;
-  double new_mv_count;
-  double duration;
-  double count;
-} FIRSTPASS_STATS;
-
 // This enumerator type needs to be kept aligned with the mode order in
 // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
 typedef enum {
@@ -217,6 +197,17 @@ typedef enum {
   LAST_FRAME_PARTITION_ALL = 2
 } LAST_FRAME_PARTITION_METHOD;
 
+typedef enum {
+  // No recode.
+  DISALLOW_RECODE = 0,
+  // Allow recode for KF and exceeding maximum frame bandwidth.
+  ALLOW_RECODE_KFMAXBW = 1,
+  // Allow recode only for KF/ARF/GF frames.
+  ALLOW_RECODE_KFARFGF = 2,
+  // Allow recode for all frames based on bitrate constraints.
+  ALLOW_RECODE = 3,
+} RECODE_LOOP_TYPE;
+
 typedef struct {
   // This flag refers to whether or not to perform rd optimization.
   int RD;
@@ -224,11 +215,7 @@ typedef struct {
   // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
   SEARCH_METHODS search_method;
 
-  // Recode_loop can be:
-  // 0 means we only encode a frame once
-  // 1 means we can re-encode based on bitrate constraints on any frame
-  // 2 means we can only recode gold, alt, and key frames.
-  int recode_loop;
+  RECODE_LOOP_TYPE recode_loop;
 
   // Subpel_search_method can only be subpel_tree which does a subpixel
   // logarithmic search that keeps stepping at 1/2 pixel units until
@@ -407,6 +394,16 @@ typedef struct {
   int super_fast_rtc;
 } SPEED_FEATURES;
 
+typedef struct {
+  RATE_CONTROL rc;
+  int target_bandwidth;
+  int64_t starting_buffer_level;
+  int64_t optimal_buffer_level;
+  int64_t maximum_buffer_size;
+  double framerate;
+  int avg_frame_size;
+} LAYER_CONTEXT;
+
 typedef struct VP9_COMP {
   DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]);
   DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]);
@@ -451,9 +448,6 @@ typedef struct VP9_COMP {
   int gld_fb_idx;
   int alt_fb_idx;
 
-  int current_layer;
-  int use_svc;
-
 #if CONFIG_MULTIPLE_ARF
   int alt_ref_fb_idx[REF_FRAMES - 3];
 #endif
@@ -573,46 +567,7 @@ typedef struct VP9_COMP {
   uint64_t time_pick_lpf;
   uint64_t time_encode_sb_row;
 
-  struct twopass_rc {
-    unsigned int section_intra_rating;
-    unsigned int next_iiratio;
-    unsigned int this_iiratio;
-    FIRSTPASS_STATS total_stats;
-    FIRSTPASS_STATS this_frame_stats;
-    FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start;
-    FIRSTPASS_STATS total_left_stats;
-    int first_pass_done;
-    int64_t bits_left;
-    int64_t clip_bits_total;
-    double avg_iiratio;
-    double modified_error_min;
-    double modified_error_max;
-    double modified_error_total;
-    double modified_error_left;
-    double kf_intra_err_min;
-    double gf_intra_err_min;
-    int static_scene_max_gf_interval;
-    int kf_bits;
-    // Remaining error from uncoded frames in a gf group. Two pass use only
-    int64_t gf_group_error_left;
-
-    // Projected total bits available for a key frame group of frames
-    int64_t kf_group_bits;
-
-    // Error score of frames still to be coded in kf group
-    int64_t kf_group_error_left;
-
-    // Projected Bits available for a group of frames including 1 GF or ARF
-    int64_t gf_group_bits;
-    // Bits for the golden frame or ARF - 2 pass only
-    int gf_bits;
-    int alt_extra_bits;
-
-    int sr_update_lag;
-
-    int kf_zeromotion_pct;
-    int gf_zeromotion_pct;
-  } twopass;
+  struct twopass_rc twopass;
 
   YV12_BUFFER_CONFIG alt_ref_buffer;
   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
@@ -669,7 +624,18 @@ typedef struct VP9_COMP {
   int initial_width;
   int initial_height;
 
-  int number_spatial_layers;
+  int use_svc;
+
+  struct svc {
+    int spatial_layer_id;
+    int temporal_layer_id;
+    int number_spatial_layers;
+    int number_temporal_layers;
+    // Layer context used for rate control in CBR mode, only defined for
+    // temporal layers for now.
+    LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS];
+  } svc;
+
   int enable_encode_breakout;   // Default value is 1. From first pass stats,
                                 // encode_breakout may be disabled.
 
@@ -726,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi);
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
 
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
-
 void vp9_set_speed_features(VP9_COMP *cpi);
 
 int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index a2eea1cd7..862573f3f 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -79,55 +79,47 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
                             const int16_t *dequant_ptr,
                             int zbin_oq_value, uint16_t *eob_ptr,
                             const int16_t *scan, const int16_t *iscan) {
-  int i, rc, eob;
-  int zbins[2], nzbins[2];
-  int x, y, z, sz;
+  const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1),
+                         ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) };
+  const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1};
+
   int idx = 0;
   int idx_arr[1024];
+  int i, eob = -1;
 
-  vpx_memset(qcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-  vpx_memset(dqcoeff_ptr, 0, n_coeffs*sizeof(int16_t));
-
-  eob = -1;
-
-  // Base ZBIN
-  zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1);
-  zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1);
-  nzbins[0] = zbins[0] * -1;
-  nzbins[1] = zbins[1] * -1;
+  vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
+  vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(int16_t));
 
   if (!skip_block) {
     // Pre-scan pass
     for (i = 0; i < n_coeffs; i++) {
-      rc = scan[i];
-      z = coeff_ptr[rc];
+      const int rc = scan[i];
+      const int coeff = coeff_ptr[rc];
 
       // If the coefficient is out of the base ZBIN range, keep it for
       // quantization.
-      if (z >= zbins[rc != 0] || z <= nzbins[rc != 0])
+      if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0])
         idx_arr[idx++] = i;
     }
 
     // Quantization pass: only process the coefficients selected in
     // pre-scan pass. Note: idx can be zero.
     for (i = 0; i < idx; i++) {
-      rc = scan[idx_arr[i]];
-
-      z = coeff_ptr[rc];
-      sz = (z >> 31);                               // sign of z
-      x  = (z ^ sz) - sz;                           // x = abs(z)
-
-      x += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
-      x  = clamp(x, INT16_MIN, INT16_MAX);
-      y  = ((((x * quant_ptr[rc != 0]) >> 16) + x) *
-            quant_shift_ptr[rc != 0]) >> 15;      // quantize (x)
-
-      x  = (y ^ sz) - sz;                         // get the sign back
-      qcoeff_ptr[rc]  = x;                        // write to destination
-      dqcoeff_ptr[rc] = x * dequant_ptr[rc != 0] / 2;  // dequantized value
-
-      if (y)
-        eob = idx_arr[i];                         // last nonzero coeffs
+      const int rc = scan[idx_arr[i]];
+      const int coeff = coeff_ptr[rc];
+      const int coeff_sign = (coeff >> 31);
+      int tmp;
+      int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+      abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1);
+      abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX);
+      tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) *
+               quant_shift_ptr[rc != 0]) >> 15;
+
+      qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
+      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
+
+      if (tmp)
+        eob = idx_arr[i];
     }
   }
   *eob_ptr = eob + 1;
@@ -136,8 +128,8 @@ void vp9_quantize_b_32x32_c(const int16_t *coeff_ptr, intptr_t n_coeffs,
 void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block,
                                 const int16_t *scan, const int16_t *iscan) {
   MACROBLOCKD *const xd = &x->e_mbd;
-  struct macroblock_plane* p = &x->plane[plane];
-  struct macroblockd_plane* pd = &xd->plane[plane];
+  struct macroblock_plane *p = &x->plane[plane];
+  struct macroblockd_plane *pd = &xd->plane[plane];
 
   vp9_quantize_b(BLOCK_OFFSET(p->coeff, block),
            16, x->skip_block,
@@ -223,38 +215,30 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
 }
 
 void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
-  int i;
-  VP9_COMMON *const cm = &cpi->common;
+  const VP9_COMMON *const cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
-  int zbin_extra;
-  int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
-  const int qindex = vp9_get_qindex(&cpi->common.seg, segment_id,
-                                    cpi->common.base_qindex);
-
-  int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+  const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
+  const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
+  const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q);
+  const int zbin = cpi->zbin_mode_boost + x->act_zbin_adj;
+  int i;
 
   // Y
-  zbin_extra = (cpi->common.y_dequant[qindex][1] *
-                 (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
   x->plane[0].quant = cpi->y_quant[qindex];
   x->plane[0].quant_shift = cpi->y_quant_shift[qindex];
   x->plane[0].zbin = cpi->y_zbin[qindex];
   x->plane[0].round = cpi->y_round[qindex];
-  x->plane[0].zbin_extra = (int16_t)zbin_extra;
-  x->e_mbd.plane[0].dequant = cpi->common.y_dequant[qindex];
+  x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
+  xd->plane[0].dequant = cm->y_dequant[qindex];
 
   // UV
-  zbin_extra = (cpi->common.uv_dequant[qindex][1] *
-                (cpi->zbin_mode_boost + x->act_zbin_adj)) >> 7;
-
   for (i = 1; i < 3; i++) {
     x->plane[i].quant = cpi->uv_quant[qindex];
     x->plane[i].quant_shift = cpi->uv_quant_shift[qindex];
     x->plane[i].zbin = cpi->uv_zbin[qindex];
     x->plane[i].round = cpi->uv_round[qindex];
-    x->plane[i].zbin_extra = (int16_t)zbin_extra;
-    x->e_mbd.plane[i].dequant = cpi->common.uv_dequant[qindex];
+    x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
+    xd->plane[i].dequant = cm->uv_dequant[qindex];
   }
 
 #if CONFIG_ALPHA
@@ -263,18 +247,14 @@ void vp9_mb_init_quantizer(VP9_COMP *cpi, MACROBLOCK *x) {
   x->plane[3].zbin = cpi->a_zbin[qindex];
   x->plane[3].round = cpi->a_round[qindex];
   x->plane[3].zbin_extra = (int16_t)zbin_extra;
-  x->e_mbd.plane[3].dequant = cpi->common.a_dequant[qindex];
+  xd->plane[3].dequant = cm->a_dequant[qindex];
 #endif
 
-  x->skip_block = vp9_segfeature_active(&cpi->common.seg, segment_id,
-                                        SEG_LVL_SKIP);
-
-  /* save this macroblock QIndex for vp9_update_zbin_extra() */
+  x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
   x->q_index = qindex;
 
-  /* R/D setup */
-  cpi->mb.errorperbit = rdmult >> 6;
-  cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
+  x->errorperbit = rdmult >> 6;
+  x->errorperbit += (x->errorperbit == 0);
 
   vp9_initialize_me_consts(cpi, x->q_index);
 }
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index abbf39b81..b3e9f4538 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -215,12 +215,12 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
                                    rc->av_per_frame_bandwidth >> 5);
   if (target < min_frame_target)
     target = min_frame_target;
-  if (cpi->refresh_golden_frame && rc->source_alt_ref_active) {
+  if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
     // If there is an active ARF at this location use the minimum
     // bits on this frame even if it is a constructed arf.
     // The active maximum quantizer insures that an appropriate
     // number of bits will be spent if needed for constructed ARFs.
-    target = 0;
+    target = min_frame_target;
   }
   // Clip the frame target to the maximum allowed value.
   if (target > rc->max_frame_bandwidth)
@@ -241,6 +241,26 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
   return target;
 }
 
+
+// Update the buffer level for higher layers, given the encoded current layer.
+static void update_layer_buffer_level(VP9_COMP *const cpi,
+                                      int encoded_frame_size) {
+  int temporal_layer = 0;
+  int current_temporal_layer = cpi->svc.temporal_layer_id;
+  for (temporal_layer = current_temporal_layer + 1;
+      temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) {
+    LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer];
+    RATE_CONTROL *lrc = &lc->rc;
+    int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
+        encoded_frame_size);
+    lrc->bits_off_target += bits_off_for_this_layer;
+
+    // Clip buffer level to maximum buffer size for the layer.
+    lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size);
+    lrc->buffer_level = lrc->bits_off_target;
+  }
+}
+
 // Update the buffer level: leaky bucket model.
 static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   const VP9_COMMON *const cm = &cpi->common;
@@ -255,14 +275,18 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
   }
 
   // Clip the buffer level to the maximum specified buffer size.
-  rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+  rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size);
+  rc->buffer_level = rc->bits_off_target;
+
+  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    update_layer_buffer_level(cpi, encoded_frame_size);
+  }
 }
 
 int vp9_rc_drop_frame(VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   RATE_CONTROL *const rc = &cpi->rc;
 
-
   if (!oxcf->drop_frames_water_mark) {
     return 0;
   } else {
@@ -273,7 +297,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
       // If buffer is below drop_mark, for now just drop every other frame
       // (starting with the next frame) until it increases back over drop_mark.
       int drop_mark = (int)(oxcf->drop_frames_water_mark *
-                                oxcf->optimal_buffer_level / 100);
+          oxcf->optimal_buffer_level / 100);
       if ((rc->buffer_level > drop_mark) &&
           (rc->decimation_factor > 0)) {
         --rc->decimation_factor;
@@ -301,7 +325,8 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
   if (cpi->common.frame_type == KEY_FRAME) {
     return cpi->rc.key_frame_rate_correction_factor;
   } else {
-    if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       return cpi->rc.gf_rate_correction_factor;
     else
       return cpi->rc.rate_correction_factor;
@@ -312,7 +337,8 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
   if (cpi->common.frame_type == KEY_FRAME) {
     cpi->rc.key_frame_rate_correction_factor = factor;
   } else {
-    if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)
+    if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) &&
+        !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER))
       cpi->rc.gf_rate_correction_factor = factor;
     else
       cpi->rc.rate_correction_factor = factor;
@@ -461,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
       double q_adj_factor = 1.0;
       double q_val;
 
-      // Baseline value derived from cpi->active_worst_quality and kf boost
-      active_best_quality = get_active_quality(active_worst_quality,
+      active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
                                                rc->kf_boost,
                                                kf_low, kf_high,
                                                kf_low_motion_minq,
@@ -495,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
         rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
       q = rc->avg_frame_qindex[INTER_FRAME];
     } else {
-      q = active_worst_quality;
+      q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ?
+          active_worst_quality : rc->avg_frame_qindex[KEY_FRAME];
     }
     // For constrained quality dont allow Q less than the cq level
     if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
@@ -538,7 +564,25 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
     if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
       active_best_quality = cpi->cq_target_quality;
     } else {
-      active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+      // Use the lower of active_worst_quality and recent/average Q.
+      if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+        if (cm->current_video_frame > 1) {
+          if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        } else {
+          if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        }
+      } else {
+        if (cm->current_video_frame > 1)
+          active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+        else
+          active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+      }
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -574,7 +618,6 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
     *top_index = (active_worst_quality + active_best_quality) / 2;
   }
 #endif
-
   if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
     q = active_best_quality;
   // Special case code to try and match quality with forced key frames
@@ -810,12 +853,28 @@ static int rc_pick_q_and_adjust_q_bounds_two_pass(const VP9_COMP *cpi,
 int vp9_rc_pick_q_and_adjust_q_bounds(const VP9_COMP *cpi,
                                       int *bottom_index,
                                       int *top_index) {
+  int q;
   if (cpi->pass == 0)
-    return rc_pick_q_and_adjust_q_bounds_one_pass(
+    q = rc_pick_q_and_adjust_q_bounds_one_pass(
         cpi, bottom_index, top_index);
   else
-    return rc_pick_q_and_adjust_q_bounds_two_pass(
+    q = rc_pick_q_and_adjust_q_bounds_two_pass(
         cpi, bottom_index, top_index);
+
+  // JBB : This is realtime mode.  In real time mode the first frame
+  // should be larger. Q of 0 is disabled because we force tx size to be
+  // 16x16...
+  if (cpi->sf.super_fast_rtc) {
+    if (cpi->common.current_video_frame == 0)
+      q /= 3;
+    if (q == 0)
+      q++;
+    if (q < *bottom_index)
+      *bottom_index = q;
+    else if (q > *top_index)
+      *top_index = q;
+  }
+  return q;
 }
 
 void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi,
@@ -918,7 +977,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
   rc->projected_frame_size = (bytes_used << 3);
 
   // Post encode loop adjustment of Q prediction.
-  vp9_rc_update_rate_correction_factors(cpi, (cpi->sf.recode_loop ||
+  vp9_rc_update_rate_correction_factors(
+      cpi, (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF ||
             cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
 
   // Keep a record of last Q and ambient average Q.
@@ -927,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
     rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+      !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
     rc->last_q[2] = cm->base_qindex;
     rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1002,21 +1063,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
   cpi->rc.frames_to_key--;
 }
 
-void vp9_rc_get_svc_params(VP9_COMP *cpi) {
-  VP9_COMMON *const cm = &cpi->common;
-  if ((cm->current_video_frame == 0) ||
-      (cm->frame_flags & FRAMEFLAGS_KEY) ||
-      (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
-                              cpi->key_frame_frequency == 0))) {
-    cm->frame_type = KEY_FRAME;
-    cpi->rc.source_alt_ref_active = 0;
-  } else {
-    cm->frame_type = INTER_FRAME;
-  }
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
-}
-
 static int test_for_kf_one_pass(VP9_COMP *cpi) {
   // Placeholder function for auto key frame
   return 0;
@@ -1025,62 +1071,88 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
 #define USE_ALTREF_FOR_ONE_PASS   1
 
 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+  static const int af_ratio = 10;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth;
-  target = vp9_rc_clamp_pframe_target_size(cpi, target);
-  return target;
+  int target;
+#if USE_ALTREF_FOR_ONE_PASS
+  target = (!rc->is_src_frame_alt_ref &&
+            (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ?
+      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval * af_ratio) /
+      (cpi->rc.baseline_gf_interval + af_ratio - 1) :
+      (rc->av_per_frame_bandwidth * cpi->rc.baseline_gf_interval) /
+      (cpi->rc.baseline_gf_interval + af_ratio - 1);
+#else
+  target = rc->av_per_frame_bandwidth;
+#endif
+  return vp9_rc_clamp_pframe_target_size(cpi, target);
 }
 
 static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+  static const int kf_ratio = 25;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth * 8;
-  target = vp9_rc_clamp_iframe_target_size(cpi, target);
-  return target;
+  int target = rc->av_per_frame_bandwidth * kf_ratio;
+  return vp9_rc_clamp_iframe_target_size(cpi, target);
+}
+
+static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+  int active_worst_quality;
+  if (cpi->common.frame_type == KEY_FRAME) {
+    if (cpi->common.current_video_frame == 0) {
+      active_worst_quality = cpi->rc.worst_quality;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    }
+  } else if (!cpi->rc.is_src_frame_alt_ref &&
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME];
+    }
+  } else {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+    }
+  }
+  if (active_worst_quality > cpi->rc.worst_quality)
+    active_worst_quality = cpi->rc.worst_quality;
+  return active_worst_quality;
 }
 
 void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
        cm->frame_flags & FRAMEFLAGS_KEY ||
-       cpi->rc.frames_to_key == 0 ||
+       rc->frames_to_key == 0 ||
        (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = DEFAULT_KF_BOOST;
-    cpi->rc.source_alt_ref_active = 0;
-    if (cm->current_video_frame == 0) {
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
-      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
-        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    }
+    rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+                                rc->frames_to_key == 0;
+    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->kf_boost = DEFAULT_KF_BOOST;
+    rc->source_alt_ref_active = 0;
   } else {
     cm->frame_type = INTER_FRAME;
-    if (cm->current_video_frame == 1) {
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
-      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
-        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    }
   }
-  if (cpi->rc.frames_till_gf_update_due == 0) {
-    cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
-    cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
+  if (rc->frames_till_gf_update_due == 0) {
+    rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
     // NOTE: frames_till_gf_update_due must be <= frames_to_key.
-    if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
-      cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
+    if (rc->frames_till_gf_update_due > rc->frames_to_key)
+      rc->frames_till_gf_update_due = rc->frames_to_key;
     cpi->refresh_golden_frame = 1;
-    cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    cpi->rc.gfu_boost = 2000;
+    rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+    rc->gfu_boost = DEFAULT_GF_BOOST;
   }
+  cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
   if (cm->frame_type == KEY_FRAME)
     target = calc_iframe_target_size_one_pass_vbr(cpi);
   else
@@ -1099,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   const RATE_CONTROL *rc = &cpi->rc;
   int active_worst_quality = rc->active_worst_quality;
   // Maximum limit for down adjustment, ~20%.
-  int max_adjustment_down = active_worst_quality / 5;
   // Buffer level below which we push active_worst to worst_quality.
   int critical_level = oxcf->optimal_buffer_level >> 2;
   int adjustment = 0;
   int buff_lvl_step = 0;
+  if (cpi->common.frame_type == KEY_FRAME)
+    return rc->worst_quality;
   if (rc->buffer_level > oxcf->optimal_buffer_level) {
     // Adjust down.
+    int max_adjustment_down = active_worst_quality / 5;
     if (max_adjustment_down) {
       buff_lvl_step = (int)((oxcf->maximum_buffer_size -
           oxcf->optimal_buffer_level) / max_adjustment_down);
@@ -1135,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
 static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth;
-  const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
-                                   FRAME_OVERHEAD_BITS);
   const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
   const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+  int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+                             FRAME_OVERHEAD_BITS);
+  int target = rc->av_per_frame_bandwidth;
+  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    // Note that for layers, av_per_frame_bandwidth is the cumulative
+    // per-frame-bandwidth. For the target size of this frame, use the
+    // layer average frame size (i.e., non-cumulative per-frame-bw).
+    int current_temporal_layer = cpi->svc.temporal_layer_id;
+    const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+    target = lc->avg_frame_size;
+    min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+  }
   if (diff > 0) {
     // Lower the target bandwidth for this frame.
     const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
@@ -1149,51 +1232,73 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
     const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
     target += (target * pct_high) / 200;
   }
-  if (target < min_frame_target)
-    target = min_frame_target;
-  return target;
+  return MAX(min_frame_target, target);
 }
 
 static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
-  int target;
   const RATE_CONTROL *rc = &cpi->rc;
+
   if (cpi->common.current_video_frame == 0) {
-    target = cpi->oxcf.starting_buffer_level / 2;
+    return cpi->oxcf.starting_buffer_level / 2;
   } else {
-    int initial_boost = 32;
+    const int initial_boost = 32;
     int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
     if (rc->frames_since_key < cpi->output_framerate / 2) {
       kf_boost = (int)(kf_boost * rc->frames_since_key /
                        (cpi->output_framerate / 2));
     }
-    target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
+    return ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
   }
-  return target;
+}
+
+void vp9_rc_get_svc_params(VP9_COMP *cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  int target = cpi->rc.av_per_frame_bandwidth;
+  if ((cm->current_video_frame == 0) ||
+      (cm->frame_flags & FRAMEFLAGS_KEY) ||
+      (cpi->oxcf.auto_key && (cpi->rc.frames_since_key %
+                              cpi->key_frame_frequency == 0))) {
+    cm->frame_type = KEY_FRAME;
+    cpi->rc.source_alt_ref_active = 0;
+    if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+      target = calc_iframe_target_size_one_pass_cbr(cpi);
+    }
+  } else {
+    cm->frame_type = INTER_FRAME;
+    if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+      target = calc_pframe_target_size_one_pass_cbr(cpi);
+    }
+  }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
+  vp9_rc_set_frame_target(cpi, target);
+  cpi->rc.frames_till_gf_update_due = INT_MAX;
+  cpi->rc.baseline_gf_interval = INT_MAX;
 }
 
 void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if ((cm->current_video_frame == 0 ||
       cm->frame_flags & FRAMEFLAGS_KEY ||
-      cpi->rc.frames_to_key == 0 ||
+      rc->frames_to_key == 0 ||
       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = DEFAULT_KF_BOOST;
-    cpi->rc.source_alt_ref_active = 0;
+    rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+                                rc->frames_to_key == 0;
+    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->kf_boost = DEFAULT_KF_BOOST;
+    rc->source_alt_ref_active = 0;
     target = calc_iframe_target_size_one_pass_cbr(cpi);
-    cpi->rc.active_worst_quality = cpi->rc.worst_quality;
   } else {
     cm->frame_type = INTER_FRAME;
     target = calc_pframe_target_size_one_pass_cbr(cpi);
-    cpi->rc.active_worst_quality =
-        calc_active_worst_quality_one_pass_cbr(cpi);
   }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
   vp9_rc_set_frame_target(cpi, target);
   // Don't use gf_update by default in CBR mode.
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
+  rc->frames_till_gf_update_due = INT_MAX;
+  rc->baseline_gf_interval = INT_MAX;
 }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index e5230feb4..fcfab2a41 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1064,7 +1064,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
         so = &vp9_scan_orders[TX_4X4][tx_type];
 
         if (tx_type != DCT_DCT)
-          vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
+          vp9_fht4x4(src_diff, coeff, 8, tx_type);
         else
           x->fwd_txm4x4(src_diff, coeff, 8);
 
@@ -1645,14 +1645,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
                                     int mi_row, int mi_col) {
-  int i, br = 0, idx, idy;
+  int k, br = 0, idx, idy;
   int64_t bd = 0, block_sse = 0;
   MB_PREDICTION_MODE this_mode;
+  MACROBLOCKD *xd = &x->e_mbd;
   VP9_COMMON *cm = &cpi->common;
-  MODE_INFO *mi = x->e_mbd.mi_8x8[0];
+  MODE_INFO *mi = xd->mi_8x8[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
-  struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
   const int label_count = 4;
   int64_t this_segment_rd = 0;
   int label_mv_thresh;
@@ -1660,7 +1661,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-  vp9_variance_fn_ptr_t *v_fn_ptr;
+  vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
   ENTROPY_CONTEXT t_above[2], t_left[2];
   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   int mode_idx;
@@ -1670,8 +1671,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
 
-  v_fn_ptr = &cpi->fn_ptr[bsize];
-
   // 64 makes this threshold really big effectively
   // making it so that we very rarely check mvs on
   // segments.   setting this to 1 would make mv thresh
@@ -1687,20 +1686,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
-      i = idy * 2 + idx;
-
-      frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
-      vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                    i, 0, mi_row, mi_col,
-                                    &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
-                                    &frame_mv[NEARMV][mbmi->ref_frame[0]]);
-      if (has_second_rf) {
-        frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
-        vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                      i, 1, mi_row, mi_col,
-                                      &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
-                                      &frame_mv[NEARMV][mbmi->ref_frame[1]]);
+      const int i = idy * 2 + idx;
+      int ref;
+
+      for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+        const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+        frame_mv[ZEROMV][frame].as_int = 0;
+        vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
+                                      &frame_mv[NEARESTMV][frame],
+                                      &frame_mv[NEARMV][frame]);
       }
+
       // search for the best motion vector on this segment
       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
         const struct buf_2d orig_src = x->plane[0].src;
@@ -2042,8 +2038,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   bsi->sse = block_sse;
 
   // update the coding decisions
-  for (i = 0; i < 4; ++i)
-    bsi->modes[i] = mi->bmi[i].as_mode;
+  for (k = 0; k < 4; ++k)
+    bsi->modes[k] = mi->bmi[k].as_mode;
 }
 
 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad.c
index 58c5df47e..58c5df47e 100644
--- a/vp9/encoder/vp9_sad_c.c
+++ b/vp9/encoder/vp9_sad.c
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index ea031fb07..2b82d9750 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -244,32 +244,36 @@ void fadst4_avx2(__m128i *in) {
   transpose_4x4_avx2(in);
 }
 
-void vp9_short_fht4x4_avx2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {
+void vp9_fht4x4_avx2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {
   __m128i in[4];
-  load_buffer_4x4_avx2(input, in, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct4_avx2(in);
-      fdct4_avx2(in);
+    case DCT_DCT:
+      vp9_fdct4x4_avx2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_4x4_avx2(input, in, stride);
       fadst4_avx2(in);
       fdct4_avx2(in);
+      write_buffer_4x4_avx2(output, in);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_4x4_avx2(input, in, stride);
       fdct4_avx2(in);
       fadst4_avx2(in);
+      write_buffer_4x4_avx2(output, in);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_4x4_avx2(input, in, stride);
       fadst4_avx2(in);
       fadst4_avx2(in);
+      write_buffer_4x4_avx2(output, in);
       break;
     default:
       assert(0);
       break;
   }
-  write_buffer_4x4_avx2(output, in);
 }
 
 void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
@@ -1028,33 +1032,39 @@ void fadst8_avx2(__m128i *in) {
   array_transpose_8x8_avx2(in, in);
 }
 
-void vp9_short_fht8x8_avx2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {
+void vp9_fht8x8_avx2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {
   __m128i in[8];
-  load_buffer_8x8_avx2(input, in, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct8_avx2(in);
-      fdct8_avx2(in);
+    case DCT_DCT:
+      vp9_fdct8x8_avx2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_8x8_avx2(input, in, stride);
       fadst8_avx2(in);
       fdct8_avx2(in);
+      right_shift_8x8_avx2(in, 1);
+      write_buffer_8x8_avx2(output, in, 8);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_8x8_avx2(input, in, stride);
       fdct8_avx2(in);
       fadst8_avx2(in);
+      right_shift_8x8_avx2(in, 1);
+      write_buffer_8x8_avx2(output, in, 8);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_8x8_avx2(input, in, stride);
       fadst8_avx2(in);
       fadst8_avx2(in);
+      right_shift_8x8_avx2(in, 1);
+      write_buffer_8x8_avx2(output, in, 8);
       break;
     default:
       assert(0);
       break;
   }
-  right_shift_8x8_avx2(in, 1);
-  write_buffer_8x8_avx2(output, in, 8);
 }
 
 void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
@@ -2534,36 +2544,39 @@ void fadst16_avx2(__m128i *in0, __m128i *in1) {
   array_transpose_16x16_avx2(in0, in1);
 }
 
-void vp9_short_fht16x16_avx2(const int16_t *input, int16_t *output,
-                             int stride, int tx_type) {
+void vp9_fht16x16_avx2(const int16_t *input, int16_t *output,
+                      int stride, int tx_type) {
   __m128i in0[16], in1[16];
-  load_buffer_16x16_avx2(input, in0, in1, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct16_avx2(in0, in1);
-      right_shift_16x16_avx2(in0, in1);
-      fdct16_avx2(in0, in1);
+    case DCT_DCT:
+      vp9_fdct16x16_avx2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_16x16_avx2(input, in0, in1, stride);
       fadst16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
       fdct16_avx2(in0, in1);
+      write_buffer_16x16_avx2(output, in0, in1, 16);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_16x16_avx2(input, in0, in1, stride);
       fdct16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
       fadst16_avx2(in0, in1);
+      write_buffer_16x16_avx2(output, in0, in1, 16);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_16x16_avx2(input, in0, in1, stride);
       fadst16_avx2(in0, in1);
       right_shift_16x16_avx2(in0, in1);
       fadst16_avx2(in0, in1);
+      write_buffer_16x16_avx2(output, in0, in1, 16);
       break;
     default:
       assert(0);
       break;
   }
-  write_buffer_16x16_avx2(output, in0, in1, 16);
 }
 
 #define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index c876cc273..852cf8667 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -242,32 +242,36 @@ void fadst4_sse2(__m128i *in) {
   transpose_4x4(in);
 }
 
-void vp9_short_fht4x4_sse2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {
+void vp9_fht4x4_sse2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {
   __m128i in[4];
-  load_buffer_4x4(input, in, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct4_sse2(in);
-      fdct4_sse2(in);
+    case DCT_DCT:
+      vp9_fdct4x4_sse2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_4x4(input, in, stride);
       fadst4_sse2(in);
       fdct4_sse2(in);
+      write_buffer_4x4(output, in);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_4x4(input, in, stride);
       fdct4_sse2(in);
       fadst4_sse2(in);
+      write_buffer_4x4(output, in);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_4x4(input, in, stride);
       fadst4_sse2(in);
       fadst4_sse2(in);
+      write_buffer_4x4(output, in);
       break;
-    default:
-      assert(0);
-      break;
+   default:
+     assert(0);
+     break;
   }
-  write_buffer_4x4(output, in);
 }
 
 void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
@@ -1026,33 +1030,39 @@ void fadst8_sse2(__m128i *in) {
   array_transpose_8x8(in, in);
 }
 
-void vp9_short_fht8x8_sse2(const int16_t *input, int16_t *output,
-                           int stride, int tx_type) {
+void vp9_fht8x8_sse2(const int16_t *input, int16_t *output,
+                     int stride, int tx_type) {
   __m128i in[8];
-  load_buffer_8x8(input, in, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct8_sse2(in);
-      fdct8_sse2(in);
+    case DCT_DCT:
+      vp9_fdct8x8_sse2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_8x8(input, in, stride);
       fadst8_sse2(in);
       fdct8_sse2(in);
+      right_shift_8x8(in, 1);
+      write_buffer_8x8(output, in, 8);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_8x8(input, in, stride);
       fdct8_sse2(in);
       fadst8_sse2(in);
+      right_shift_8x8(in, 1);
+      write_buffer_8x8(output, in, 8);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_8x8(input, in, stride);
       fadst8_sse2(in);
       fadst8_sse2(in);
+      right_shift_8x8(in, 1);
+      write_buffer_8x8(output, in, 8);
       break;
     default:
       assert(0);
       break;
   }
-  right_shift_8x8(in, 1);
-  write_buffer_8x8(output, in, 8);
 }
 
 void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
@@ -2532,36 +2542,39 @@ void fadst16_sse2(__m128i *in0, __m128i *in1) {
   array_transpose_16x16(in0, in1);
 }
 
-void vp9_short_fht16x16_sse2(const int16_t *input, int16_t *output,
-                             int stride, int tx_type) {
+void vp9_fht16x16_sse2(const int16_t *input, int16_t *output,
+                       int stride, int tx_type) {
   __m128i in0[16], in1[16];
-  load_buffer_16x16(input, in0, in1, stride);
+
   switch (tx_type) {
-    case 0:  // DCT_DCT
-      fdct16_sse2(in0, in1);
-      right_shift_16x16(in0, in1);
-      fdct16_sse2(in0, in1);
+    case DCT_DCT:
+      vp9_fdct16x16_sse2(input, output, stride);
       break;
-    case 1:  // ADST_DCT
+    case ADST_DCT:
+      load_buffer_16x16(input, in0, in1, stride);
       fadst16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
       fdct16_sse2(in0, in1);
+      write_buffer_16x16(output, in0, in1, 16);
       break;
-    case 2:  // DCT_ADST
+    case DCT_ADST:
+      load_buffer_16x16(input, in0, in1, stride);
       fdct16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
       fadst16_sse2(in0, in1);
+      write_buffer_16x16(output, in0, in1, 16);
       break;
-    case 3:  // ADST_ADST
+    case ADST_ADST:
+      load_buffer_16x16(input, in0, in1, stride);
       fadst16_sse2(in0, in1);
       right_shift_16x16(in0, in1);
       fadst16_sse2(in0, in1);
+      write_buffer_16x16(output, in0, in1, 16);
       break;
     default:
       assert(0);
       break;
   }
-  write_buffer_16x16(output, in0, in1, 16);
 }
 
 #define FDCT32x32_2D vp9_fdct32x32_rd_sse2
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index db306603b..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   pmaxsw                          m8, m7
   pshuflw                         m7, m8, 0x1
   pmaxsw                          m8, m7
-  pextrw                        [r2], m8, 0
+  pextrw                          r6, m8, 0
+  mov                             [r2], r6
   RET
 
   ; skip-block, i.e. just write all zeroes
@@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
 %endmacro
 
 INIT_XMM ssse3
-QUANTIZE_FN b, 6
+QUANTIZE_FN b, 7
 QUANTIZE_FN b_32x32, 7