8 files changed, 200 insertions, 276 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f5d43d659..442170f0c 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -319,7 +319,7 @@ static void build_activity_map(VP9_COMP *cpi) {
 }
 
 // Macroblock activity masking
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
+static void activity_masking(VP9_COMP *cpi, MACROBLOCK *x) {
 #if USE_ACT_INDEX
   x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2);
   x->errorperbit = x->rdmult * 100 / (110 * x->rddiv);
@@ -673,7 +673,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
   }
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
-    vp9_activity_masking(cpi, x);
+    activity_masking(cpi, x);
 
   if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
     vp9_clear_system_state();  // __asm emms;
@@ -2188,108 +2188,6 @@ static void switch_tx_mode(VP9_COMP *cpi) {
     cpi->common.tx_mode = ALLOW_32X32;
 }
 
-static void encode_frame_internal(VP9_COMP *cpi) {
-  int mi_row;
-  MACROBLOCK *const x = &cpi->mb;
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *const xd = &x->e_mbd;
-
-//  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
-//           cpi->common.current_video_frame, cpi->common.show_frame,
-//           cm->frame_type);
-
-  vp9_zero(cm->counts.switchable_interp);
-  vp9_zero(cpi->tx_stepdown_count);
-
-  xd->mi_8x8 = cm->mi_grid_visible;
-  // required for vp9_frame_init_quantizer
-  xd->mi_8x8[0] = cm->mi;
-
-  xd->last_mi = cm->prev_mi;
-
-  vp9_zero(cm->counts.mv);
-  vp9_zero(cpi->coef_counts);
-  vp9_zero(cm->counts.eob_branch);
-
-  cpi->mb.e_mbd.lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0
-      && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0;
-  switch_lossless_mode(cpi, cpi->mb.e_mbd.lossless);
-
-  vp9_frame_init_quantizer(cpi);
-
-  vp9_initialize_rd_consts(cpi);
-  vp9_initialize_me_consts(cpi, cm->base_qindex);
-  switch_tx_mode(cpi);
-
-  if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
-    // Initialize encode frame context.
-    init_encode_frame_mb_context(cpi);
-
-    // Build a frame level activity map
-    build_activity_map(cpi);
-  }
-
-  // Re-initialize encode frame context.
-  init_encode_frame_mb_context(cpi);
-
-  vp9_zero(cpi->rd_comp_pred_diff);
-  vp9_zero(cpi->rd_filter_diff);
-  vp9_zero(cpi->rd_tx_select_diff);
-  vp9_zero(cpi->rd_tx_select_threshes);
-
-  set_prev_mi(cm);
-
-  {
-    struct vpx_usec_timer emr_timer;
-    vpx_usec_timer_start(&emr_timer);
-
-    {
-      // Take tiles into account and give start/end MB
-      int tile_col, tile_row;
-      TOKENEXTRA *tp = cpi->tok;
-      const int tile_cols = 1 << cm->log2_tile_cols;
-      const int tile_rows = 1 << cm->log2_tile_rows;
-
-      for (tile_row = 0; tile_row < tile_rows; tile_row++) {
-        for (tile_col = 0; tile_col < tile_cols; tile_col++) {
-          TileInfo tile;
-          TOKENEXTRA *tp_old = tp;
-
-          // For each row of SBs in the frame
-          vp9_tile_init(&tile, cm, tile_row, tile_col);
-          for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_sb_row(cpi, &tile, mi_row, &tp);
-
-          cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
-          assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
-        }
-      }
-    }
-
-    vpx_usec_timer_mark(&emr_timer);
-    cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
-  }
-
-  if (cpi->sf.skip_encode_sb) {
-    int j;
-    unsigned int intra_count = 0, inter_count = 0;
-    for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
-      intra_count += cm->counts.intra_inter[j][0];
-      inter_count += cm->counts.intra_inter[j][1];
-    }
-    cpi->sf.skip_encode_frame = ((intra_count << 2) < inter_count);
-    cpi->sf.skip_encode_frame &= (cm->frame_type != KEY_FRAME);
-    cpi->sf.skip_encode_frame &= cm->show_frame;
-  } else {
-    cpi->sf.skip_encode_frame = 0;
-  }
-
-#if 0
-  // Keep record of the total distortion this time around for future use
-  cpi->last_frame_distortion = cpi->frame_distortion;
-#endif
-}
 
 static int check_dual_ref_flags(VP9_COMP *cpi) {
   const int ref_flags = cpi->ref_frame_flags;
@@ -2579,28 +2477,18 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
                      &dummy_rate, &dummy_dist, 1);
   }
 }
+// end RTC play code
 
-
-static void encode_rtc_frame_internal(VP9_COMP *cpi) {
+static void encode_frame_internal(VP9_COMP *cpi) {
   int mi_row;
-  MACROBLOCK * const x = &cpi->mb;
-  VP9_COMMON * const cm = &cpi->common;
-  MACROBLOCKD * const xd = &x->e_mbd;
+  MACROBLOCK *const x = &cpi->mb;
+  VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
 
 //  fprintf(stderr, "encode_frame_internal frame %d (%d) type %d\n",
 //           cpi->common.current_video_frame, cpi->common.show_frame,
 //           cm->frame_type);
 
-// debug output
-#if DBG_PRNT_SEGMAP
-  {
-    FILE *statsfile;
-    statsfile = fopen("segmap2.stt", "a");
-    fprintf(statsfile, "\n");
-    fclose(statsfile);
-  }
-#endif
-
   vp9_zero(cm->counts.switchable_interp);
   vp9_zero(cpi->tx_stepdown_count);
 
@@ -2610,7 +2498,7 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
 
   xd->last_mi = cm->prev_mi;
 
-  vp9_zero(cpi->common.counts.mv);
+  vp9_zero(cm->counts.mv);
   vp9_zero(cpi->coef_counts);
   vp9_zero(cm->counts.eob_branch);
 
@@ -2623,7 +2511,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   vp9_initialize_rd_consts(cpi);
   vp9_initialize_me_consts(cpi, cm->base_qindex);
   switch_tx_mode(cpi);
-  cpi->sf.always_this_block_size = BLOCK_16X16;
 
   if (cpi->oxcf.tuning == VP8_TUNE_SSIM) {
     // Initialize encode frame context.
@@ -2662,9 +2549,12 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
           // For each row of SBs in the frame
           vp9_tile_init(&tile, cm, tile_row, tile_col);
           for (mi_row = tile.mi_row_start;
-               mi_row < tile.mi_row_end; mi_row += 8)
-            encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
-
+               mi_row < tile.mi_row_end; mi_row += 8) {
+            if (cpi->sf.super_fast_rtc)
+              encode_rtc_sb_row(cpi, &tile, mi_row, &tp);
+            else
+              encode_sb_row(cpi, &tile, mi_row, &tp);
+          }
           cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
           assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
         }
@@ -2694,8 +2584,6 @@ static void encode_rtc_frame_internal(VP9_COMP *cpi) {
   cpi->last_frame_distortion = cpi->frame_distortion;
 #endif
 }
-// end RTC play code
-
 
 void vp9_encode_frame(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2770,10 +2658,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
     select_tx_mode(cpi);
     cm->reference_mode = reference_mode;
 
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
 
     for (i = 0; i < REFERENCE_MODES; ++i) {
       const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
@@ -2853,10 +2738,7 @@ void vp9_encode_frame(VP9_COMP *cpi) {
   } else {
     // Force the usage of the BILINEAR interp_filter.
     cm->interp_filter = BILINEAR;
-    if (cpi->sf.super_fast_rtc)
-      encode_rtc_frame_internal(cpi);
-    else
-      encode_frame_internal(cpi);
+    encode_frame_internal(cpi);
   }
 }
 
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 838f74e8c..dc35044d6 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -902,8 +902,8 @@ static double calc_correction_factor(double err_per_mb,
   return fclamp(pow(error_term, power_term), 0.05, 5.0);
 }
 
-static int estimate_max_q(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
-                          int section_target_bandwitdh) {
+int vp9_twopass_worst_quality(VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh) {
   int q;
   const int num_mbs = cpi->common.MBs;
   int target_norm_bits_per_mb;
@@ -2280,8 +2280,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
     // Special case code for first frame.
     const int section_target_bandwidth = (int)(twopass->bits_left /
                                                frames_left);
-    const int tmp_q = estimate_max_q(cpi, &twopass->total_left_stats,
-                                     section_target_bandwidth);
+    const int tmp_q = vp9_twopass_worst_quality(cpi, &twopass->total_left_stats,
+                                                section_target_bandwidth);
 
     rc->active_worst_quality = tmp_q;
     rc->ni_av_qi = tmp_q;
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index 054ecf811..7e612183e 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -88,6 +88,8 @@ void vp9_end_first_pass(struct VP9_COMP *cpi);
 void vp9_init_second_pass(struct VP9_COMP *cpi);
 void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
 void vp9_end_second_pass(struct VP9_COMP *cpi);
+int vp9_twopass_worst_quality(struct VP9_COMP *cpi, FIRSTPASS_STATS *fpstats,
+                              int section_target_bandwitdh);
 
 // Post encode update of the rate control parameters for 2-pass
 void vp9_twopass_postencode_update(struct VP9_COMP *cpi,
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 917d3a4a2..a81aa0a71 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -846,6 +846,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
   }
   if (speed >= 6) {
     sf->super_fast_rtc = 1;
+    sf->always_this_block_size = BLOCK_16X16;
   }
 }
 
@@ -1155,14 +1156,17 @@ static void init_layer_context(VP9_COMP *const cpi) {
     LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
     RATE_CONTROL *const lrc = &lc->rc;
     lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q];
-    lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
-    lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q];
+    lrc->avg_frame_qindex[INTER_FRAME] = lrc->active_worst_quality;
+    lrc->last_q[INTER_FRAME] = lrc->active_worst_quality;
     lrc->ni_av_qi = lrc->active_worst_quality;
     lrc->total_actual_bits = 0;
     lrc->total_target_vs_actual = 0;
     lrc->ni_tot_qi = 0;
     lrc->tot_q = 0.0;
+    lrc->avg_q = 0.0;
     lrc->ni_frames = 0;
+    lrc->decimation_count = 0;
+    lrc->decimation_factor = 0;
     lrc->rate_correction_factor = 1.0;
     lrc->key_frame_rate_correction_factor = 1.0;
     lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] *
@@ -1207,13 +1211,24 @@ static void update_layer_context_change_config(VP9_COMP *const cpi,
 // for the current layer.
 static void update_layer_framerate(VP9_COMP *const cpi) {
   int temporal_layer = cpi->svc.temporal_layer_id;
+  const VP9_CONFIG *const oxcf = &cpi->oxcf;
   LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer];
   RATE_CONTROL *const lrc = &lc->rc;
-  lc->framerate = cpi->oxcf.framerate /
-      cpi->oxcf.ts_rate_decimator[temporal_layer];
-  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth /
-      lc->framerate);
+  lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer];
+  lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
   lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
+  // Update the average layer frame size (non-cumulative per-frame-bw).
+  if (temporal_layer == 0) {
+    lc->avg_frame_size = lrc->av_per_frame_bandwidth;
+  } else {
+    double prev_layer_framerate = oxcf->framerate /
+        oxcf->ts_rate_decimator[temporal_layer - 1];
+    int prev_layer_target_bandwidth =
+        oxcf->ts_target_bitrate[temporal_layer - 1] * 1000;
+    lc->avg_frame_size =
+        (int)(lc->target_bandwidth - prev_layer_target_bandwidth) /
+        (lc->framerate - prev_layer_framerate);
+  }
 }
 
 // Prior to encoding the frame, set the layer context, for the current layer
@@ -1729,11 +1744,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) {
                                sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
   }
 
-#ifdef ENTROPY_STATS
-  if (cpi->pass != 1)
-    init_context_counters();
-#endif
-
   /*Initialize the feed-forward activity masking.*/
   cpi->activity_avg = 90 << 12;
   cpi->key_frame_frequency = cpi->oxcf.key_freq;
@@ -2767,10 +2777,10 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 static void encode_without_recode_loop(VP9_COMP *cpi,
                                        size_t *size,
                                        uint8_t *dest,
-                                       int *q) {
+                                       int q) {
   VP9_COMMON *const cm = &cpi->common;
   vp9_clear_system_state();  // __asm emms;
-  vp9_set_quantizer(cpi, *q);
+  vp9_set_quantizer(cpi, q);
 
   // Set up entropy context depending on frame type. The decoder mandates
   // the use of the default context, index 0, for keyframes and inter
@@ -2804,7 +2814,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     size_t *size,
                                     uint8_t *dest,
-                                    int *q,
+                                    int q,
                                     int bottom_index,
                                     int top_index) {
   VP9_COMMON *const cm = &cpi->common;
@@ -2824,7 +2834,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
   do {
     vp9_clear_system_state();  // __asm emms;
 
-    vp9_set_quantizer(cpi, *q);
+    vp9_set_quantizer(cpi, q);
 
     if (loop_count == 0) {
       // Set up entropy context depending on frame type. The decoder mandates
@@ -2881,7 +2891,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
       if ((cm->frame_type == KEY_FRAME) &&
            cpi->rc.this_key_frame_forced &&
            (cpi->rc.projected_frame_size < cpi->rc.max_frame_bandwidth)) {
-        int last_q = *q;
+        int last_q = q;
         int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
@@ -2897,32 +2907,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             (kf_err > low_err_target &&
              cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
           // Lower q_high
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           // Adjust Q
-          *q = ((*q) * high_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low) >> 1);
+          q = (q * high_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low) >> 1);
         } else if (kf_err < low_err_target &&
                    cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
           // The key frame is much better than the previous frame
           // Raise q_low
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           // Adjust Q
-          *q = ((*q) * low_err_target) / kf_err;
-          *q = MIN((*q), (q_high + q_low + 1) >> 1);
+          q = (q * low_err_target) / kf_err;
+          q = MIN(q, (q_high + q_low + 1) >> 1);
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else if (recode_loop_test(
           cpi, frame_over_shoot_limit, frame_under_shoot_limit,
-          *q, MAX(q_high, top_index), bottom_index)) {
+          q, MAX(q_high, top_index), bottom_index)) {
         // Is the projected frame size out of range and are we allowed
         // to attempt to recode.
-        int last_q = *q;
+        int last_q = q;
         int retries = 0;
 
         // Frame size out of permitted range:
@@ -2935,23 +2945,23 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             q_high = cpi->rc.worst_quality;
 
           // Raise Qlow as to at least the current value
-          q_low = *q < q_high ? *q + 1 : q_high;
+          q_low = q < q_high ? q + 1 : q_high;
 
           if (undershoot_seen || loop_count > 1) {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 1);
 
-            *q = (q_high + q_low + 1) / 2;
+            q = (q_high + q_low + 1) / 2;
           } else {
             // Update rate_correction_factor unless
             vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, MAX(q_high, top_index));
 
-            while (*q < q_low && retries < 10) {
+            while (q < q_low && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, MAX(q_high, top_index));
               retries++;
             }
@@ -2960,27 +2970,27 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           overshoot_seen = 1;
         } else {
           // Frame is too small
-          q_high = *q > q_low ? *q - 1 : q_low;
+          q_high = q > q_low ? q - 1 : q_low;
 
           if (overshoot_seen || loop_count > 1) {
             vp9_rc_update_rate_correction_factors(cpi, 1);
-            *q = (q_high + q_low) / 2;
+            q = (q_high + q_low) / 2;
           } else {
             vp9_rc_update_rate_correction_factors(cpi, 0);
-            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+            q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                    bottom_index, top_index);
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
             if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
-                *q < q_low) {
-              q_low = *q;
+                q < q_low) {
+              q_low = q;
             }
 
-            while (*q > q_high && retries < 10) {
+            while (q > q_high && retries < 10) {
               vp9_rc_update_rate_correction_factors(cpi, 0);
-              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
+              q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target,
                                      bottom_index, top_index);
               retries++;
             }
@@ -2990,9 +3000,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         }
 
         // Clamp Q to upper and lower limits:
-        *q = clamp(*q, q_low, q_high);
+        q = clamp(q, q_low, q_high);
 
-        loop = *q != last_q;
+        loop = q != last_q;
       } else {
         loop = 0;
       }
@@ -3210,9 +3220,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   }
 
   if (cpi->sf.recode_loop == DISALLOW_RECODE) {
-    encode_without_recode_loop(cpi, size, dest, &q);
+    encode_without_recode_loop(cpi, size, dest, q);
   } else {
-    encode_with_recode_loop(cpi, size, dest, &q, bottom_index, top_index);
+    encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
   }
 
   // Special case code to reduce pulsing when key frames are forced at a
@@ -3272,10 +3282,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     }
   }
 
-#ifdef ENTROPY_STATS
-  vp9_update_mode_context_stats(cpi);
-#endif
-
 #if 0
   output_frame_level_debug_stats(cpi);
 #endif
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 5fd8e5a54..90ed606f6 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -45,7 +45,7 @@ extern "C" {
 #else
 #define MIN_GF_INTERVAL             4
 #endif
-#define DEFAULT_GF_INTERVAL         11
+#define DEFAULT_GF_INTERVAL         10
 #define DEFAULT_KF_BOOST            2000
 #define DEFAULT_GF_BOOST            2000
 
@@ -401,6 +401,7 @@ typedef struct {
   int64_t optimal_buffer_level;
   int64_t maximum_buffer_size;
   double framerate;
+  int avg_frame_size;
 } LAYER_CONTEXT;
 
 typedef struct VP9_COMP {
@@ -691,8 +692,6 @@ void vp9_encode_frame(VP9_COMP *cpi);
 
 void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
 
-void vp9_activity_masking(VP9_COMP *cpi, MACROBLOCK *x);
-
 void vp9_set_speed_features(VP9_COMP *cpi);
 
 int vp9_calc_ss_err(const YV12_BUFFER_CONFIG *source,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b2581072e..b3e9f4538 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -215,7 +215,7 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
                                    rc->av_per_frame_bandwidth >> 5);
   if (target < min_frame_target)
     target = min_frame_target;
-  if (cpi->refresh_golden_frame && rc->source_alt_ref_active) {
+  if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) {
     // If there is an active ARF at this location use the minimum
     // bits on this frame even if it is a constructed arf.
     // The active maximum quantizer insures that an appropriate
@@ -487,8 +487,7 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
       double q_adj_factor = 1.0;
       double q_val;
 
-      // Baseline value derived from cpi->active_worst_quality and kf boost
-      active_best_quality = get_active_quality(active_worst_quality,
+      active_best_quality = get_active_quality(rc->avg_frame_qindex[KEY_FRAME],
                                                rc->kf_boost,
                                                kf_low, kf_high,
                                                kf_low_motion_minq,
@@ -521,7 +520,8 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
         rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
       q = rc->avg_frame_qindex[INTER_FRAME];
     } else {
-      q = active_worst_quality;
+      q = (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) ?
+          active_worst_quality : rc->avg_frame_qindex[KEY_FRAME];
     }
     // For constrained quality dont allow Q less than the cq level
     if (oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) {
@@ -565,10 +565,24 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi,
       active_best_quality = cpi->cq_target_quality;
     } else {
       // Use the lower of active_worst_quality and recent/average Q.
-      if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
-       active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
-      else
-        active_best_quality = inter_minq[active_worst_quality];
+      if (oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+        if (cm->current_video_frame > 1) {
+          if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        } else {
+          if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality)
+            active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+          else
+            active_best_quality = inter_minq[active_worst_quality];
+        }
+      } else {
+        if (cm->current_video_frame > 1)
+          active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]];
+        else
+          active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]];
+      }
       // For the constrained quality mode we don't want
       // q to fall below the cq level.
       if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) &&
@@ -973,7 +987,8 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
     rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
   } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+      !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) {
     rc->last_q[2] = cm->base_qindex;
     rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
         3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
@@ -1056,7 +1071,7 @@ static int test_for_kf_one_pass(VP9_COMP *cpi) {
 #define USE_ALTREF_FOR_ONE_PASS   1
 
 static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
-  static const int af_ratio = 5;
+  static const int af_ratio = 10;
   const RATE_CONTROL *rc = &cpi->rc;
   int target;
 #if USE_ALTREF_FOR_ONE_PASS
@@ -1073,55 +1088,71 @@ static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
 }
 
 static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
-  static const int kf_ratio = 12;
+  static const int kf_ratio = 25;
   const RATE_CONTROL *rc = &cpi->rc;
   int target = rc->av_per_frame_bandwidth * kf_ratio;
   return vp9_rc_clamp_iframe_target_size(cpi, target);
 }
 
+static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) {
+  int active_worst_quality;
+  if (cpi->common.frame_type == KEY_FRAME) {
+    if (cpi->common.current_video_frame == 0) {
+      active_worst_quality = cpi->rc.worst_quality;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    }
+  } else if (!cpi->rc.is_src_frame_alt_ref &&
+             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 5 / 4;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME];
+    }
+  } else {
+    if (cpi->common.current_video_frame == 1) {
+      active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
+    } else {
+      // Choose active worst quality twice as large as the last q.
+      active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
+    }
+  }
+  if (active_worst_quality > cpi->rc.worst_quality)
+    active_worst_quality = cpi->rc.worst_quality;
+  return active_worst_quality;
+}
+
 void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if (!cpi->refresh_alt_ref_frame &&
       (cm->current_video_frame == 0 ||
        cm->frame_flags & FRAMEFLAGS_KEY ||
-       cpi->rc.frames_to_key == 0 ||
+       rc->frames_to_key == 0 ||
        (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = DEFAULT_KF_BOOST;
-    cpi->rc.source_alt_ref_active = 0;
-    if (cm->current_video_frame == 0) {
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      cpi->rc.active_worst_quality = cpi->rc.last_q[KEY_FRAME] * 2;
-      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
-        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    }
+    rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+                                rc->frames_to_key == 0;
+    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->kf_boost = DEFAULT_KF_BOOST;
+    rc->source_alt_ref_active = 0;
   } else {
     cm->frame_type = INTER_FRAME;
-    if (cm->current_video_frame == 1) {
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    } else {
-      // Choose active worst quality twice as large as the last q.
-      cpi->rc.active_worst_quality = cpi->rc.last_q[INTER_FRAME] * 2;
-      if (cpi->rc.active_worst_quality > cpi->rc.worst_quality)
-        cpi->rc.active_worst_quality = cpi->rc.worst_quality;
-    }
   }
-  if (cpi->rc.frames_till_gf_update_due == 0) {
-    cpi->rc.baseline_gf_interval = DEFAULT_GF_INTERVAL;
-    cpi->rc.frames_till_gf_update_due = cpi->rc.baseline_gf_interval;
+  if (rc->frames_till_gf_update_due == 0) {
+    rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+    rc->frames_till_gf_update_due = rc->baseline_gf_interval;
     // NOTE: frames_till_gf_update_due must be <= frames_to_key.
-    if (cpi->rc.frames_till_gf_update_due > cpi->rc.frames_to_key)
-      cpi->rc.frames_till_gf_update_due = cpi->rc.frames_to_key;
+    if (rc->frames_till_gf_update_due > rc->frames_to_key)
+      rc->frames_till_gf_update_due = rc->frames_to_key;
     cpi->refresh_golden_frame = 1;
-    cpi->rc.source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
-    cpi->rc.gfu_boost = DEFAULT_GF_BOOST;
+    rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+    rc->gfu_boost = DEFAULT_GF_BOOST;
   }
+  cpi->rc.active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi);
   if (cm->frame_type == KEY_FRAME)
     target = calc_iframe_target_size_one_pass_vbr(cpi);
   else
@@ -1140,13 +1171,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
   const RATE_CONTROL *rc = &cpi->rc;
   int active_worst_quality = rc->active_worst_quality;
   // Maximum limit for down adjustment, ~20%.
-  int max_adjustment_down = active_worst_quality / 5;
   // Buffer level below which we push active_worst to worst_quality.
   int critical_level = oxcf->optimal_buffer_level >> 2;
   int adjustment = 0;
   int buff_lvl_step = 0;
+  if (cpi->common.frame_type == KEY_FRAME)
+    return rc->worst_quality;
   if (rc->buffer_level > oxcf->optimal_buffer_level) {
     // Adjust down.
+    int max_adjustment_down = active_worst_quality / 5;
     if (max_adjustment_down) {
       buff_lvl_step = (int)((oxcf->maximum_buffer_size -
           oxcf->optimal_buffer_level) / max_adjustment_down);
@@ -1176,11 +1209,20 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
 static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
   const VP9_CONFIG *oxcf = &cpi->oxcf;
   const RATE_CONTROL *rc = &cpi->rc;
-  int target = rc->av_per_frame_bandwidth;
-  const int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
-                                   FRAME_OVERHEAD_BITS);
   const int64_t diff = oxcf->optimal_buffer_level - rc->buffer_level;
   const int one_pct_bits = 1 + oxcf->optimal_buffer_level / 100;
+  int min_frame_target = MAX(rc->av_per_frame_bandwidth >> 4,
+                             FRAME_OVERHEAD_BITS);
+  int target = rc->av_per_frame_bandwidth;
+  if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+    // Note that for layers, av_per_frame_bandwidth is the cumulative
+    // per-frame-bandwidth. For the target size of this frame, use the
+    // layer average frame size (i.e., non-cumulative per-frame-bw).
+    int current_temporal_layer = cpi->svc.temporal_layer_id;
+    const LAYER_CONTEXT *lc = &cpi->svc.layer_context[current_temporal_layer];
+    target = lc->avg_frame_size;
+    min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
+  }
   if (diff > 0) {
     // Lower the target bandwidth for this frame.
     const int pct_low = MIN(diff / one_pct_bits, oxcf->under_shoot_pct);
@@ -1190,26 +1232,23 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
     const int pct_high = MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
     target += (target * pct_high) / 200;
   }
-  if (target < min_frame_target)
-    target = min_frame_target;
-  return target;
+  return MAX(min_frame_target, target);
 }
 
 static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
-  int target;
   const RATE_CONTROL *rc = &cpi->rc;
+
   if (cpi->common.current_video_frame == 0) {
-    target = cpi->oxcf.starting_buffer_level / 2;
+    return cpi->oxcf.starting_buffer_level / 2;
   } else {
-    int initial_boost = 32;
+    const int initial_boost = 32;
     int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
     if (rc->frames_since_key < cpi->output_framerate / 2) {
       kf_boost = (int)(kf_boost * rc->frames_since_key /
                        (cpi->output_framerate / 2));
     }
-    target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
+    return ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
   }
-  return target;
 }
 
 void vp9_rc_get_svc_params(VP9_COMP *cpi) {
@@ -1223,16 +1262,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     cpi->rc.source_alt_ref_active = 0;
     if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
       target = calc_iframe_target_size_one_pass_cbr(cpi);
-      cpi->rc.active_worst_quality = cpi->rc.worst_quality;
     }
   } else {
     cm->frame_type = INTER_FRAME;
     if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
       target = calc_pframe_target_size_one_pass_cbr(cpi);
-      cpi->rc.active_worst_quality =
-          calc_active_worst_quality_one_pass_cbr(cpi);
     }
   }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
   vp9_rc_set_frame_target(cpi, target);
   cpi->rc.frames_till_gf_update_due = INT_MAX;
   cpi->rc.baseline_gf_interval = INT_MAX;
@@ -1240,27 +1278,27 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
 
 void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
+  RATE_CONTROL *const rc = &cpi->rc;
   int target;
   if ((cm->current_video_frame == 0 ||
       cm->frame_flags & FRAMEFLAGS_KEY ||
-      cpi->rc.frames_to_key == 0 ||
+      rc->frames_to_key == 0 ||
       (cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
     cm->frame_type = KEY_FRAME;
-    cpi->rc.this_key_frame_forced = cm->current_video_frame != 0 &&
-                                    cpi->rc.frames_to_key == 0;
-    cpi->rc.frames_to_key = cpi->key_frame_frequency;
-    cpi->rc.kf_boost = DEFAULT_KF_BOOST;
-    cpi->rc.source_alt_ref_active = 0;
+    rc->this_key_frame_forced = cm->current_video_frame != 0 &&
+                                rc->frames_to_key == 0;
+    rc->frames_to_key = cpi->key_frame_frequency;
+    rc->kf_boost = DEFAULT_KF_BOOST;
+    rc->source_alt_ref_active = 0;
     target = calc_iframe_target_size_one_pass_cbr(cpi);
-    cpi->rc.active_worst_quality = cpi->rc.worst_quality;
   } else {
     cm->frame_type = INTER_FRAME;
     target = calc_pframe_target_size_one_pass_cbr(cpi);
-    cpi->rc.active_worst_quality =
-        calc_active_worst_quality_one_pass_cbr(cpi);
   }
+  cpi->rc.active_worst_quality =
+      calc_active_worst_quality_one_pass_cbr(cpi);
   vp9_rc_set_frame_target(cpi, target);
   // Don't use gf_update by default in CBR mode.
-  cpi->rc.frames_till_gf_update_due = INT_MAX;
-  cpi->rc.baseline_gf_interval = INT_MAX;
+  rc->frames_till_gf_update_due = INT_MAX;
+  rc->baseline_gf_interval = INT_MAX;
 }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 7b17b8582..fcfab2a41 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1645,14 +1645,15 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                     BEST_SEG_INFO *bsi_buf, int filter_idx,
                                     int_mv seg_mvs[4][MAX_REF_FRAMES],
                                     int mi_row, int mi_col) {
-  int i, br = 0, idx, idy;
+  int k, br = 0, idx, idy;
   int64_t bd = 0, block_sse = 0;
   MB_PREDICTION_MODE this_mode;
+  MACROBLOCKD *xd = &x->e_mbd;
   VP9_COMMON *cm = &cpi->common;
-  MODE_INFO *mi = x->e_mbd.mi_8x8[0];
+  MODE_INFO *mi = xd->mi_8x8[0];
   MB_MODE_INFO *const mbmi = &mi->mbmi;
   struct macroblock_plane *const p = &x->plane[0];
-  struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
+  struct macroblockd_plane *const pd = &xd->plane[0];
   const int label_count = 4;
   int64_t this_segment_rd = 0;
   int label_mv_thresh;
@@ -1660,7 +1661,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   const BLOCK_SIZE bsize = mbmi->sb_type;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
-  vp9_variance_fn_ptr_t *v_fn_ptr;
+  vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
   ENTROPY_CONTEXT t_above[2], t_left[2];
   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
   int mode_idx;
@@ -1670,8 +1671,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
 
-  v_fn_ptr = &cpi->fn_ptr[bsize];
-
   // 64 makes this threshold really big effectively
   // making it so that we very rarely check mvs on
   // segments.   setting this to 1 would make mv thresh
@@ -1687,20 +1686,17 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
-      i = idy * 2 + idx;
-
-      frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
-      vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                    i, 0, mi_row, mi_col,
-                                    &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
-                                    &frame_mv[NEARMV][mbmi->ref_frame[0]]);
-      if (has_second_rf) {
-        frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
-        vp9_append_sub8x8_mvs_for_idx(cm, &x->e_mbd, tile,
-                                      i, 1, mi_row, mi_col,
-                                      &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
-                                      &frame_mv[NEARMV][mbmi->ref_frame[1]]);
+      const int i = idy * 2 + idx;
+      int ref;
+
+      for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+        const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
+        frame_mv[ZEROMV][frame].as_int = 0;
+        vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
+                                      &frame_mv[NEARESTMV][frame],
+                                      &frame_mv[NEARMV][frame]);
       }
+
       // search for the best motion vector on this segment
       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
         const struct buf_2d orig_src = x->plane[0].src;
@@ -2042,8 +2038,8 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   bsi->sse = block_sse;
 
   // update the coding decisions
-  for (i = 0; i < 4; ++i)
-    bsi->modes[i] = mi->bmi[i].as_mode;
+  for (k = 0; k < 4; ++k)
+    bsi->modes[k] = mi->bmi[k].as_mode;
 }
 
 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index db306603b..48ccef8cc 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -188,7 +188,8 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
   pmaxsw                          m8, m7
   pshuflw                         m7, m8, 0x1
   pmaxsw                          m8, m7
-  pextrw                        [r2], m8, 0
+  pextrw                          r6, m8, 0
+  mov                             [r2], r6
   RET
 
   ; skip-block, i.e. just write all zeroes
@@ -214,5 +215,5 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \
 %endmacro
 
 INIT_XMM ssse3
-QUANTIZE_FN b, 6
+QUANTIZE_FN b, 7
 QUANTIZE_FN b_32x32, 7