6 files changed, 350 insertions, 365 deletions
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 930e2f82b..50d803680 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -76,6 +76,19 @@ static int select_cq_level(int qindex) {
   return ret_val;
 }
 
+static int gfboost_qadjust(int qindex) {
+  const double q = vp9_convert_qindex_to_q(qindex);
+  return (int)((0.00000828 * q * q * q) +
+               (-0.0055 * q * q) +
+               (1.32 * q) + 79.3);
+}
+
+static int kfboost_qadjust(int qindex) {
+  const double q = vp9_convert_qindex_to_q(qindex);
+  return (int)((0.00000973 * q * q * q) +
+               (-0.00613 * q * q) +
+               (1.316 * q) + 121.2);
+}
 
 // Resets the first pass file to the given position using a relative seek from
 // the current position.
@@ -928,11 +941,11 @@ static int64_t estimate_modemvcost(VP9_COMP *cpi,
   intra_cost = bitcost(av_intra);
 
   // Estimate of extra bits per mv overhead for mbs
-  // << 9 is the normalization to the (bits * 512) used in vp9_bits_per_mb
+  // << 9 is the normalization to the (bits * 512) used in vp9_rc_bits_per_mb
   mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9;
 
   // Crude estimate of overhead cost from modes
-  // << 9 is the normalization to (bits * 512) used in vp9_bits_per_mb
+  // << 9 is the normalization to (bits * 512) used in vp9_rc_bits_per_mb
   mode_cost =
     (int)((((av_pct_inter - av_pct_motion) * zz_cost) +
            (av_pct_motion * motion_cost) +
@@ -1052,8 +1065,8 @@ static int estimate_max_q(VP9_COMP *cpi,
                                 sr_correction * speed_correction *
                                 cpi->twopass.est_max_qcorrection_factor;
 
-    bits_per_mb_at_this_q = vp9_bits_per_mb(INTER_FRAME, q,
-                                            err_correction_factor);
+    bits_per_mb_at_this_q = vp9_rc_bits_per_mb(INTER_FRAME, q,
+                                               err_correction_factor);
 
     if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
       break;
@@ -1140,7 +1153,7 @@ static int estimate_cq(VP9_COMP *cpi,
       sr_correction * speed_correction * clip_iifactor;
 
     bits_per_mb_at_this_q =
-      vp9_bits_per_mb(INTER_FRAME, q, err_correction_factor);
+      vp9_rc_bits_per_mb(INTER_FRAME, q, err_correction_factor);
 
     if (bits_per_mb_at_this_q <= target_norm_bits_per_mb)
       break;
@@ -1936,7 +1949,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
     int q = cpi->rc.last_q[INTER_FRAME];
     int gf_bits;
 
-    int boost = (cpi->rc.gfu_boost * vp9_gfboost_qadjust(q)) / 100;
+    int boost = (cpi->rc.gfu_boost * gfboost_qadjust(q)) / 100;
 
     // Set max and minimum boost and hence minimum allocation
     boost = clamp(boost, 125, (cpi->rc.baseline_gf_interval + 1) * 200);
@@ -2728,3 +2741,21 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   // sizes.
   cpi->twopass.modified_error_left -= kf_group_err;
 }
+
+void vp9_twopass_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
+#ifdef DISABLE_RC_LONG_TERM_MEM
+  cpi->twopass.bits_left -=  cpi->rc.this_frame_target;
+#else
+  cpi->twopass.bits_left -= 8 * bytes_used;
+#endif
+  if (!cpi->refresh_alt_ref_frame) {
+    double lower_bounds_min_rate = FRAME_OVERHEAD_BITS * cpi->oxcf.framerate;
+    double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth *
+                                        cpi->oxcf.two_pass_vbrmin_section
+                                        / 100);
+    if (two_pass_min_rate < lower_bounds_min_rate)
+      two_pass_min_rate = lower_bounds_min_rate;
+    cpi->twopass.bits_left += (int64_t)(two_pass_min_rate /
+                                        cpi->oxcf.framerate);
+  }
+}
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 7ba5bc47d..b0314475f 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -161,7 +161,7 @@ void vp9_initialize_enc() {
     vp9_tokenize_initialize();
     vp9_init_quant_tables();
     vp9_init_me_luts();
-    vp9_init_minq_luts();
+    vp9_rc_init_minq_luts();
     // init_base_skip_probs();
     vp9_entropy_mv_init();
     vp9_entropy_mode_init();
@@ -268,8 +268,8 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi,
   vp9_clear_system_state();
 
   // Look up the current projected bits per block for the base index
-  base_bits_per_mb = vp9_bits_per_mb(cpi->common.frame_type,
-                                     base_q_index, 1.0);
+  base_bits_per_mb = vp9_rc_bits_per_mb(cpi->common.frame_type,
+                                        base_q_index, 1.0);
 
   // Find the target bits per mb based on the base value and given ratio.
   target_bits_per_mb = rate_target_ratio * base_bits_per_mb;
@@ -277,8 +277,8 @@ int vp9_compute_qdelta_by_rate(VP9_COMP *cpi,
   // Convert the q target to an index
   for (i = cpi->rc.best_quality; i < cpi->rc.worst_quality; i++) {
     target_index = i;
-    if (vp9_bits_per_mb(cpi->common.frame_type,
-                        i, 1.0) <= target_bits_per_mb )
+    if (vp9_rc_bits_per_mb(cpi->common.frame_type,
+                           i, 1.0) <= target_bits_per_mb )
       break;
   }
 
@@ -2686,7 +2686,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
 static void encode_with_recode_loop(VP9_COMP *cpi,
                                     unsigned long *size,
                                     uint8_t *dest,
-                                    int q,
+                                    int *q,
                                     int bottom_index,
                                     int top_index,
                                     int frame_over_shoot_limit,
@@ -2700,7 +2700,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
   do {
     vp9_clear_system_state();  // __asm emms;
 
-    vp9_set_quantizer(cpi, q);
+    vp9_set_quantizer(cpi, *q);
 
     if (loop_count == 0) {
       // Set up entropy context depending on frame type. The decoder mandates
@@ -2753,7 +2753,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
     } else {
       // Special case handling for forced key frames
       if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) {
-        int last_q = q;
+        int last_q = *q;
         int kf_err = vp9_calc_ss_err(cpi->Source, get_frame_new_buffer(cm));
 
         int high_err_target = cpi->ambient_err;
@@ -2769,32 +2769,32 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
             (kf_err > low_err_target &&
              cpi->rc.projected_frame_size <= frame_under_shoot_limit)) {
           // Lower q_high
-          q_high = q > q_low ? q - 1 : q_low;
+          q_high = *q > q_low ? *q - 1 : q_low;
 
           // Adjust Q
-          q = (q * high_err_target) / kf_err;
-          q = MIN(q, (q_high + q_low) >> 1);
+          *q = ((*q) * high_err_target) / kf_err;
+          *q = MIN((*q), (q_high + q_low) >> 1);
         } else if (kf_err < low_err_target &&
                    cpi->rc.projected_frame_size >= frame_under_shoot_limit) {
           // The key frame is much better than the previous frame
           // Raise q_low
-          q_low = q < q_high ? q + 1 : q_high;
+          q_low = *q < q_high ? *q + 1 : q_high;
 
           // Adjust Q
-          q = (q * low_err_target) / kf_err;
-          q = MIN(q, (q_high + q_low + 1) >> 1);
+          *q = ((*q) * low_err_target) / kf_err;
+          *q = MIN((*q), (q_high + q_low + 1) >> 1);
         }
 
         // Clamp Q to upper and lower limits:
-        q = clamp(q, q_low, q_high);
+        *q = clamp(*q, q_low, q_high);
 
-        loop = q != last_q;
+        loop = *q != last_q;
       } else if (recode_loop_test(
           cpi, frame_over_shoot_limit, frame_under_shoot_limit,
-          q, top_index, bottom_index)) {
+          *q, top_index, bottom_index)) {
         // Is the projected frame size out of range and are we allowed
         // to attempt to recode.
-        int last_q = q;
+        int last_q = *q;
         int retries = 0;
 
         // Frame size out of permitted range:
@@ -2803,22 +2803,22 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         // Frame is too large
         if (cpi->rc.projected_frame_size > cpi->rc.this_frame_target) {
           // Raise Qlow as to at least the current value
-          q_low = q < q_high ? q + 1 : q_high;
+          q_low = *q < q_high ? *q + 1 : q_high;
 
           if (undershoot_seen || loop_count > 1) {
             // Update rate_correction_factor unless
-            vp9_update_rate_correction_factors(cpi, 1);
+            vp9_rc_update_rate_correction_factors(cpi, 1);
 
-            q = (q_high + q_low + 1) / 2;
+            *q = (q_high + q_low + 1) / 2;
           } else {
             // Update rate_correction_factor unless
-            vp9_update_rate_correction_factors(cpi, 0);
+            vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            q = vp9_regulate_q(cpi, cpi->rc.this_frame_target);
+            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
 
-            while (q < q_low && retries < 10) {
-              vp9_update_rate_correction_factors(cpi, 0);
-              q = vp9_regulate_q(cpi, cpi->rc.this_frame_target);
+            while (*q < q_low && retries < 10) {
+              vp9_rc_update_rate_correction_factors(cpi, 0);
+              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
               retries++;
             }
           }
@@ -2826,32 +2826,33 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
           overshoot_seen = 1;
         } else {
           // Frame is too small
-          q_high = q > q_low ? q - 1 : q_low;
+          q_high = *q > q_low ? *q - 1 : q_low;
 
           if (overshoot_seen || loop_count > 1) {
             // Update rate_correction_factor unless
             // cpi->rc.active_worst_quality has changed.
-            vp9_update_rate_correction_factors(cpi, 1);
+            vp9_rc_update_rate_correction_factors(cpi, 1);
 
-            q = (q_high + q_low) / 2;
+            *q = (q_high + q_low) / 2;
           } else {
             // Update rate_correction_factor unless
             // cpi->rc.active_worst_quality has changed.
-            vp9_update_rate_correction_factors(cpi, 0);
+            vp9_rc_update_rate_correction_factors(cpi, 0);
 
-            q = vp9_regulate_q(cpi, cpi->rc.this_frame_target);
+            *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
 
             // Special case reset for qlow for constrained quality.
             // This should only trigger where there is very substantial
             // undershoot on a frame and the auto cq level is above
             // the user passsed in value.
-            if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && q < q_low) {
-              q_low = q;
+            if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY &&
+                *q < q_low) {
+              q_low = *q;
             }
 
-            while (q > q_high && retries < 10) {
-              vp9_update_rate_correction_factors(cpi, 0);
-              q = vp9_regulate_q(cpi, cpi->rc.this_frame_target);
+            while (*q > q_high && retries < 10) {
+              vp9_rc_update_rate_correction_factors(cpi, 0);
+              *q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
               retries++;
             }
           }
@@ -2860,9 +2861,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
         }
 
         // Clamp Q to upper and lower limits:
-        q = clamp(q, q_low, q_high);
+        *q = clamp(*q, q_low, q_high);
 
-        loop = q != last_q;
+        loop = *q != last_q;
       } else {
         loop = 0;
       }
@@ -3003,15 +3004,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
     configure_static_seg_features(cpi);
   }
 
-  // Decide how big to make the frame.
-  vp9_pick_frame_size(cpi);
-
   vp9_clear_system_state();
 
-  q = vp9_pick_q_and_adjust_q_bounds(cpi, &bottom_index, &top_index);
+  // Decide how big to make the frame.
+  vp9_rc_pick_frame_size_and_bounds(cpi,
+                                    &frame_under_shoot_limit,
+                                    &frame_over_shoot_limit);
 
-  vp9_compute_frame_size_bounds(cpi, &frame_under_shoot_limit,
-                                &frame_over_shoot_limit);
+  q = vp9_rc_pick_q_and_adjust_q_bounds(cpi,
+                                        &bottom_index,
+                                        &top_index);
 
 #if CONFIG_MULTIPLE_ARF
   // Force the quantizer determined by the coding order pattern.
@@ -3075,7 +3077,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   encode_with_recode_loop(cpi,
                           size,
                           dest,
-                          q,
+                          &q,
                           bottom_index,
                           top_index,
                           frame_over_shoot_limit,
@@ -3157,106 +3159,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    * needed in motion search besides loopfilter */
   cm->last_frame_type = cm->frame_type;
 
-  // Update rate control heuristics
-  cpi->rc.projected_frame_size = (*size) << 3;
-
-  // Post encode loop adjustment of Q prediction.
-  vp9_update_rate_correction_factors(
-      cpi, (cpi->sf.recode_loop ||
-            cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
-
-
-  cpi->rc.last_q[cm->frame_type] = cm->base_qindex;
-
-  // Keep record of last boosted (KF/KF/ARF) Q value.
-  // If the current frame is coded at a lower Q then we also update it.
-  // If all mbs in this group are skipped only update if the Q value is
-  // better than that already stored.
-  // This is used to help set quality in forced key frames to reduce popping
-  if ((cm->base_qindex < cpi->rc.last_boosted_qindex) ||
-      ((cpi->static_mb_pct < 100) &&
-       ((cm->frame_type == KEY_FRAME) ||
-        cpi->refresh_alt_ref_frame ||
-        (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
-    cpi->rc.last_boosted_qindex = cm->base_qindex;
-  }
-
-  if (cm->frame_type == KEY_FRAME) {
-    vp9_adjust_key_frame_context(cpi);
-  }
-
-  // Keep a record of ambient average Q.
-  if (cm->frame_type != KEY_FRAME)
-    cpi->rc.avg_frame_qindex = (2 + 3 * cpi->rc.avg_frame_qindex +
-                            cm->base_qindex) >> 2;
-
-  // Keep a record from which we can calculate the average Q excluding GF
-  // updates and key frames.
-  if (cm->frame_type != KEY_FRAME &&
-      !cpi->refresh_golden_frame &&
-      !cpi->refresh_alt_ref_frame) {
-    cpi->rc.ni_frames++;
-    cpi->rc.tot_q += vp9_convert_qindex_to_q(q);
-    cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames;
-
-    // Calculate the average Q for normal inter frames (not key or GFU frames).
-    cpi->rc.ni_tot_qi += q;
-    cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames;
-  }
-
-  // Update the buffer level variable.
-  // Non-viewable frames are a special case and are treated as pure overhead.
-  if (!cm->show_frame)
-    cpi->rc.bits_off_target -= cpi->rc.projected_frame_size;
-  else
-    cpi->rc.bits_off_target += cpi->rc.av_per_frame_bandwidth -
-                               cpi->rc.projected_frame_size;
-
-  // Clip the buffer level at the maximum buffer size
-  if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size)
-    cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
-
-  // Rolling monitors of whether we are over or underspending used to help
-  // regulate min and Max Q in two pass.
-  if (cm->frame_type != KEY_FRAME) {
-    cpi->rc.rolling_target_bits =
-        ((cpi->rc.rolling_target_bits * 3) +
-         cpi->rc.this_frame_target + 2) / 4;
-    cpi->rc.rolling_actual_bits =
-        ((cpi->rc.rolling_actual_bits * 3) +
-         cpi->rc.projected_frame_size + 2) / 4;
-    cpi->rc.long_rolling_target_bits =
-        ((cpi->rc.long_rolling_target_bits * 31) +
-         cpi->rc.this_frame_target + 16) / 32;
-    cpi->rc.long_rolling_actual_bits =
-        ((cpi->rc.long_rolling_actual_bits * 31) +
-         cpi->rc.projected_frame_size + 16) / 32;
-  }
-
-  // Actual bits spent
-  cpi->rc.total_actual_bits += cpi->rc.projected_frame_size;
-
-  // Debug stats
-  cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target -
-                                     cpi->rc.projected_frame_size);
-
-  cpi->rc.buffer_level = cpi->rc.bits_off_target;
-
-#ifndef DISABLE_RC_LONG_TERM_MEM
-  // Update bits left to the kf and gf groups to account for overshoot or
-  // undershoot on these frames
-  if (cm->frame_type == KEY_FRAME) {
-    cpi->twopass.kf_group_bits += cpi->rc.this_frame_target -
-                                  cpi->rc.projected_frame_size;
-
-    cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
-  } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
-    cpi->twopass.gf_group_bits += cpi->rc.this_frame_target -
-                                  cpi->rc.projected_frame_size;
-
-    cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
-  }
-#endif
+  vp9_rc_postencode_update(cpi, *size, q);
 
 #if 0
   output_frame_level_debug_stats(cpi);
@@ -3386,6 +3289,10 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
 
   encode_frame_to_data_rate(cpi, size, dest, frame_flags);
   // vp9_print_modes_and_motion_vectors(&cpi->common, "encode.stt");
+
+  vp9_twopass_postencode_update(cpi, *size);
+
+  /*
 #ifdef DISABLE_RC_LONG_TERM_MEM
   cpi->twopass.bits_left -=  cpi->rc.this_frame_target;
 #else
@@ -3404,6 +3311,7 @@ static void Pass2Encode(VP9_COMP *cpi, unsigned long *size,
     cpi->twopass.bits_left += (int64_t)(two_pass_min_rate
                               / cpi->oxcf.framerate);
   }
+  */
 }
 
 static void check_initial_width(VP9_COMP *cpi, YV12_BUFFER_CONFIG *sd) {
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 03002ef4b..b8602e094 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -30,7 +30,6 @@
 #include "vp9/encoder/vp9_lookahead.h"
 
 #define DISABLE_RC_LONG_TERM_MEM 0
-
 // #define MODE_TEST_HIT_STATS
 
 // #define SPEEDSTATS 1
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 42372e56c..bf1fc4f31 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -26,6 +26,8 @@
 #include "vp9/common/vp9_quant_common.h"
 #include "vp9/common/vp9_seg_common.h"
 
+#define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1
+
 #define MIN_BPB_FACTOR 0.005
 #define MAX_BPB_FACTOR 50
 
@@ -67,7 +69,7 @@ static int calculate_minq_index(double maxq,
   return QINDEX_RANGE - 1;
 }
 
-void vp9_init_minq_luts(void) {
+void vp9_rc_init_minq_luts(void) {
   int i;
 
   for (i = 0; i < QINDEX_RANGE; i++) {
@@ -121,22 +123,8 @@ double vp9_convert_qindex_to_q(int qindex) {
   return vp9_ac_quant(qindex, 0) / 4.0;
 }
 
-int vp9_gfboost_qadjust(int qindex) {
-  const double q = vp9_convert_qindex_to_q(qindex);
-  return (int)((0.00000828 * q * q * q) +
-               (-0.0055 * q * q) +
-               (1.32 * q) + 79.3);
-}
-
-static int kfboost_qadjust(int qindex) {
-  const double q = vp9_convert_qindex_to_q(qindex);
-  return (int)((0.00000973 * q * q * q) +
-               (-0.00613 * q * q) +
-               (1.316 * q) + 121.2);
-}
-
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
-                    double correction_factor) {
+int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+                       double correction_factor) {
   const double q = vp9_convert_qindex_to_q(qindex);
   int enumerator = frame_type == KEY_FRAME ? 3300000 : 2250000;
 
@@ -213,7 +201,7 @@ void vp9_setup_inter_frame(VP9_COMP *cpi) {
 
 static int estimate_bits_at_q(int frame_kind, int q, int mbs,
                               double correction_factor) {
-  const int bpm = (int)(vp9_bits_per_mb(frame_kind, q, correction_factor));
+  const int bpm = (int)(vp9_rc_bits_per_mb(frame_kind, q, correction_factor));
 
   // Attempt to retain reasonable accuracy without overflow. The cutoff is
   // chosen such that the maximum product of Bpm and MBs fits 31 bits. The
@@ -240,15 +228,9 @@ static void calc_iframe_target_size(VP9_COMP *cpi) {
     if (target > max_rate)
       target = max_rate;
   }
-
   cpi->rc.this_frame_target = target;
-
-  // Target rate per SB64 (including partial SB64s.
-  cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
-                             (cpi->common.width * cpi->common.height);
 }
 
-
 //  Do the best we can to define the parameters for the next GF based
 //  on what information we have available.
 //
@@ -273,11 +255,6 @@ static void calc_pframe_target_size(VP9_COMP *cpi) {
     cpi->rc.this_frame_target = cpi->rc.per_frame_bandwidth;
   }
 
-  // Target rate per SB64 (including partial SB64s.
-  cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
-                             (cpi->common.width * cpi->common.height);
-
-
   // Check that the total sum of adjustments is not above the maximum allowed.
   // That is, having allowed for the KF and GF penalties, we have not pushed
   // the current inter-frame target too low. If the adjustment we apply here is
@@ -309,7 +286,7 @@ static void calc_pframe_target_size(VP9_COMP *cpi) {
 }
 
 
-void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
+void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
   const int q = cpi->common.base_qindex;
   int correction_factor = 100;
   double rate_correction_factor;
@@ -390,7 +367,7 @@ void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var) {
 }
 
 
-int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) {
+int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) {
   int q = cpi->rc.active_worst_quality;
 
   int i;
@@ -422,8 +399,8 @@ int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame) {
   i = cpi->rc.active_best_quality;
 
   do {
-    bits_per_mb_at_this_q = (int)vp9_bits_per_mb(cpi->common.frame_type, i,
-                                                 correction_factor);
+    bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(cpi->common.frame_type, i,
+                                                    correction_factor);
 
     if (bits_per_mb_at_this_q <= target_bits_per_mb) {
       if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error)
@@ -461,8 +438,9 @@ static int get_active_quality(int q,
   return active_best_quality;
 }
 
-int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
-                                   int * bottom_index, int * top_index) {
+int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
+                                      int *bottom_index,
+                                      int *top_index) {
   // Set an active best quality and if necessary active worst quality
   int q = cpi->rc.active_worst_quality;
   VP9_COMMON *const cm = &cpi->common;
@@ -481,7 +459,12 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
                                         (last_boosted_q * 0.75));
 
       cpi->rc.active_best_quality = MAX(qindex + delta_qindex,
-                                     cpi->rc.best_quality);
+                                        cpi->rc.best_quality);
+    } else if (cpi->pass == 0 && cpi->common.current_video_frame == 0) {
+      // If this is the first (key) frame in 1-pass, active best/worst is
+      // the user best/worst-allowed, and leave the top_index to active_worst.
+      cpi->rc.active_best_quality = cpi->oxcf.best_allowed_q;
+      cpi->rc.active_worst_quality = cpi->oxcf.worst_allowed_q;
     } else {
       int high = 5000;
       int low = 400;
@@ -490,9 +473,9 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
 
       // Baseline value derived from cpi->active_worst_quality and kf boost
       cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.kf_boost,
-                                                    low, high,
-                                                    kf_low_motion_minq,
-                                                    kf_high_motion_minq);
+                                                       low, high,
+                                                       kf_low_motion_minq,
+                                                       kf_high_motion_minq);
 
       // Allow somewhat lower kf minq with small image formats.
       if ((cm->width * cm->height) <= (352 * 288)) {
@@ -533,14 +516,14 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
         q = cpi->cq_target_quality;
       if (cpi->frames_since_key > 1) {
         cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                      low, high,
-                                                      afq_low_motion_minq,
-                                                      afq_high_motion_minq);
+                                                         low, high,
+                                                         afq_low_motion_minq,
+                                                         afq_high_motion_minq);
       } else {
         cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                      low, high,
-                                                      gf_low_motion_minq,
-                                                      gf_high_motion_minq);
+                                                         low, high,
+                                                         gf_low_motion_minq,
+                                                         gf_high_motion_minq);
       }
       // Constrained quality use slightly lower active best.
       cpi->rc.active_best_quality = cpi->rc.active_best_quality * 15 / 16;
@@ -550,22 +533,19 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
         cpi->rc.active_best_quality = cpi->cq_target_quality;
       } else {
         if (cpi->frames_since_key > 1) {
-          cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                        low, high,
-                                                        afq_low_motion_minq,
-                                                        afq_high_motion_minq);
+          cpi->rc.active_best_quality = get_active_quality(
+              q, cpi->rc.gfu_boost, low, high,
+              afq_low_motion_minq, afq_high_motion_minq);
         } else {
-          cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                        low, high,
-                                                        gf_low_motion_minq,
-                                                        gf_high_motion_minq);
+          cpi->rc.active_best_quality = get_active_quality(
+              q, cpi->rc.gfu_boost, low, high,
+              gf_low_motion_minq, gf_high_motion_minq);
         }
       }
     } else {
-        cpi->rc.active_best_quality = get_active_quality(q, cpi->rc.gfu_boost,
-                                                         low, high,
-                                                         gf_low_motion_minq,
-                                                         gf_high_motion_minq);
+        cpi->rc.active_best_quality = get_active_quality(
+            q, cpi->rc.gfu_boost, low, high,
+            gf_low_motion_minq, gf_high_motion_minq);
     }
   } else {
     if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
@@ -605,25 +585,23 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
   if (cpi->rc.active_worst_quality < cpi->rc.active_best_quality)
     cpi->rc.active_worst_quality = cpi->rc.active_best_quality;
 
+  *top_index = cpi->rc.active_worst_quality;
+  *bottom_index = cpi->rc.active_best_quality;
+
+#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
   // Limit Q range for the adaptive loop.
   if (cm->frame_type == KEY_FRAME && !cpi->this_key_frame_forced) {
-    *top_index =
-      (cpi->rc.active_worst_quality + cpi->rc.active_best_quality * 3) / 4;
-    // If this is the first (key) frame in 1-pass, active best is the user
-    // best-allowed, and leave the top_index to active_worst.
-    if (cpi->pass == 0 && cpi->common.current_video_frame == 0) {
-      cpi->rc.active_best_quality = cpi->oxcf.best_allowed_q;
-      *top_index = cpi->oxcf.worst_allowed_q;
+    if (!(cpi->pass == 0 && cpi->common.current_video_frame == 0)) {
+      *top_index =
+          (cpi->rc.active_worst_quality + cpi->rc.active_best_quality * 3) / 4;
     }
   } else if (!cpi->is_src_frame_alt_ref &&
              (cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER) &&
              (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
     *top_index =
       (cpi->rc.active_worst_quality + cpi->rc.active_best_quality) / 2;
-  } else {
-    *top_index = cpi->rc.active_worst_quality;
   }
-  *bottom_index = cpi->rc.active_best_quality;
+#endif
 
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
     q = cpi->rc.active_best_quality;
@@ -636,14 +614,13 @@ int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
       // 1-pass: for now, use per-frame-bw for target size of frame, scaled
       // by |x| for key frame.
       int scale = (cm->frame_type == KEY_FRAME) ? 5 : 1;
-      q = vp9_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth);
+      q = vp9_rc_regulate_q(cpi, scale * cpi->rc.av_per_frame_bandwidth);
     } else {
-      q = vp9_regulate_q(cpi, cpi->rc.this_frame_target);
+      q = vp9_rc_regulate_q(cpi, cpi->rc.this_frame_target);
     }
     if (q > *top_index)
       q = *top_index;
   }
-
   return q;
 }
 
@@ -695,7 +672,7 @@ static int estimate_keyframe_frequency(VP9_COMP *cpi) {
 }
 
 
-void vp9_adjust_key_frame_context(VP9_COMP *cpi) {
+static void adjust_key_frame_context(VP9_COMP *cpi) {
   // Clear down mmx registers to allow floating point in what follows
   vp9_clear_system_state();
 
@@ -704,28 +681,30 @@ void vp9_adjust_key_frame_context(VP9_COMP *cpi) {
 }
 
 
-void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit,
-                                   int *frame_over_shoot_limit) {
+static void compute_frame_size_bounds(const VP9_COMP *cpi,
+                                      int this_frame_target,
+                                      int *frame_under_shoot_limit,
+                                      int *frame_over_shoot_limit) {
   // Set-up bounds on acceptable frame size:
   if (cpi->oxcf.end_usage == USAGE_CONSTANT_QUALITY) {
     *frame_under_shoot_limit = 0;
     *frame_over_shoot_limit  = INT_MAX;
   } else {
     if (cpi->common.frame_type == KEY_FRAME) {
-      *frame_over_shoot_limit  = cpi->rc.this_frame_target * 9 / 8;
-      *frame_under_shoot_limit = cpi->rc.this_frame_target * 7 / 8;
+      *frame_over_shoot_limit  = this_frame_target * 9 / 8;
+      *frame_under_shoot_limit = this_frame_target * 7 / 8;
     } else {
       if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) {
-        *frame_over_shoot_limit  = cpi->rc.this_frame_target * 9 / 8;
-        *frame_under_shoot_limit = cpi->rc.this_frame_target * 7 / 8;
+        *frame_over_shoot_limit  = this_frame_target * 9 / 8;
+        *frame_under_shoot_limit = this_frame_target * 7 / 8;
       } else {
         // Stron overshoot limit for constrained quality
         if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) {
-          *frame_over_shoot_limit  = cpi->rc.this_frame_target * 11 / 8;
-          *frame_under_shoot_limit = cpi->rc.this_frame_target * 2 / 8;
+          *frame_over_shoot_limit  = this_frame_target * 11 / 8;
+          *frame_under_shoot_limit = this_frame_target * 2 / 8;
         } else {
-          *frame_over_shoot_limit  = cpi->rc.this_frame_target * 11 / 8;
-          *frame_under_shoot_limit = cpi->rc.this_frame_target * 5 / 8;
+          *frame_over_shoot_limit  = this_frame_target * 11 / 8;
+          *frame_under_shoot_limit = this_frame_target * 5 / 8;
         }
       }
     }
@@ -740,9 +719,10 @@ void vp9_compute_frame_size_bounds(VP9_COMP *cpi, int *frame_under_shoot_limit,
   }
 }
 
-
 // return of 0 means drop frame
-int vp9_pick_frame_size(VP9_COMP *cpi) {
+int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi,
+                                      int *frame_under_shoot_limit,
+                                      int *frame_over_shoot_limit) {
   VP9_COMMON *cm = &cpi->common;
 
   if (cm->frame_type == KEY_FRAME)
@@ -750,5 +730,112 @@ int vp9_pick_frame_size(VP9_COMP *cpi) {
   else
     calc_pframe_target_size(cpi);
 
+  // Target rate per SB64 (including partial SB64s.
+  cpi->rc.sb64_target_rate = ((int64_t)cpi->rc.this_frame_target * 64 * 64) /
+                             (cpi->common.width * cpi->common.height);
+  compute_frame_size_bounds(cpi, cpi->rc.this_frame_target,
+                            frame_under_shoot_limit, frame_over_shoot_limit);
+
   return 1;
 }
+
+void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used, int q) {
+  VP9_COMMON *const cm = &cpi->common;
+  // Update rate control heuristics
+  cpi->rc.projected_frame_size = (bytes_used << 3);
+
+  // Post encode loop adjustment of Q prediction.
+  vp9_rc_update_rate_correction_factors(
+      cpi, (cpi->sf.recode_loop ||
+            cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) ? 2 : 0);
+
+  cpi->rc.last_q[cm->frame_type] = cm->base_qindex;
+
+  // Keep record of last boosted (KF/KF/ARF) Q value.
+  // If the current frame is coded at a lower Q then we also update it.
+  // If all mbs in this group are skipped only update if the Q value is
+  // better than that already stored.
+  // This is used to help set quality in forced key frames to reduce popping
+  if ((cm->base_qindex < cpi->rc.last_boosted_qindex) ||
+      ((cpi->static_mb_pct < 100) &&
+       ((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame ||
+        (cpi->refresh_golden_frame && !cpi->is_src_frame_alt_ref)))) {
+    cpi->rc.last_boosted_qindex = cm->base_qindex;
+  }
+
+  if (cm->frame_type == KEY_FRAME) {
+    adjust_key_frame_context(cpi);
+  }
+
+  // Keep a record of ambient average Q.
+  if (cm->frame_type != KEY_FRAME)
+    cpi->rc.avg_frame_qindex = (2 + 3 * cpi->rc.avg_frame_qindex +
+                            cm->base_qindex) >> 2;
+
+  // Keep a record from which we can calculate the average Q excluding GF
+  // updates and key frames.
+  if (cm->frame_type != KEY_FRAME &&
+      !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) {
+    cpi->rc.ni_frames++;
+    cpi->rc.tot_q += vp9_convert_qindex_to_q(q);
+    cpi->rc.avg_q = cpi->rc.tot_q / (double)cpi->rc.ni_frames;
+
+    // Calculate the average Q for normal inter frames (not key or GFU frames).
+    cpi->rc.ni_tot_qi += q;
+    cpi->rc.ni_av_qi = cpi->rc.ni_tot_qi / cpi->rc.ni_frames;
+  }
+
+  // Update the buffer level variable.
+  // Non-viewable frames are a special case and are treated as pure overhead.
+  if (!cm->show_frame)
+    cpi->rc.bits_off_target -= cpi->rc.projected_frame_size;
+  else
+    cpi->rc.bits_off_target += cpi->rc.av_per_frame_bandwidth -
+                               cpi->rc.projected_frame_size;
+
+  // Clip the buffer level at the maximum buffer size
+  if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size)
+    cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size;
+
+  // Rolling monitors of whether we are over or underspending used to help
+  // regulate min and Max Q in two pass.
+  if (cm->frame_type != KEY_FRAME) {
+    cpi->rc.rolling_target_bits =
+        ((cpi->rc.rolling_target_bits * 3) +
+         cpi->rc.this_frame_target + 2) / 4;
+    cpi->rc.rolling_actual_bits =
+        ((cpi->rc.rolling_actual_bits * 3) +
+         cpi->rc.projected_frame_size + 2) / 4;
+    cpi->rc.long_rolling_target_bits =
+        ((cpi->rc.long_rolling_target_bits * 31) +
+         cpi->rc.this_frame_target + 16) / 32;
+    cpi->rc.long_rolling_actual_bits =
+        ((cpi->rc.long_rolling_actual_bits * 31) +
+         cpi->rc.projected_frame_size + 16) / 32;
+  }
+
+  // Actual bits spent
+  cpi->rc.total_actual_bits += cpi->rc.projected_frame_size;
+
+  // Debug stats
+  cpi->rc.total_target_vs_actual += (cpi->rc.this_frame_target -
+                                     cpi->rc.projected_frame_size);
+
+  cpi->rc.buffer_level = cpi->rc.bits_off_target;
+
+#ifndef DISABLE_RC_LONG_TERM_MEM
+  // Update bits left to the kf and gf groups to account for overshoot or
+  // undershoot on these frames
+  if (cm->frame_type == KEY_FRAME) {
+    cpi->twopass.kf_group_bits += cpi->rc.this_frame_target -
+                                  cpi->rc.projected_frame_size;
+
+    cpi->twopass.kf_group_bits = MAX(cpi->twopass.kf_group_bits, 0);
+  } else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) {
+    cpi->twopass.gf_group_bits += cpi->rc.this_frame_target -
+                                  cpi->rc.projected_frame_size;
+
+    cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
+  }
+#endif
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 13357447a..f01d18672 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -20,24 +20,41 @@ void vp9_save_coding_context(VP9_COMP *cpi);
 void vp9_restore_coding_context(VP9_COMP *cpi);
 
 void vp9_setup_key_frame(VP9_COMP *cpi);
-void vp9_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
-int vp9_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame);
-void vp9_adjust_key_frame_context(VP9_COMP *cpi);
-void vp9_compute_frame_size_bounds(VP9_COMP *cpi,
-                                   int *frame_under_shoot_limit,
-                                   int *frame_over_shoot_limit);
+void vp9_setup_inter_frame(VP9_COMP *cpi);
 
-void vp9_init_minq_luts(void);
+double vp9_convert_qindex_to_q(int qindex);
 
-// return of 0 means drop frame
-int vp9_pick_frame_size(VP9_COMP *cpi);
+// Updates rate correction factors
+void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi, int damp_var);
 
-double vp9_convert_qindex_to_q(int qindex);
-int vp9_gfboost_qadjust(int qindex);
-int vp9_bits_per_mb(FRAME_TYPE frame_type, int qindex,
-                    double correction_factor);
-void vp9_setup_inter_frame(VP9_COMP *cpi);
-int vp9_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
-                                   int * bottom_index, int * top_index);
+// initialize luts for minq
+void vp9_rc_init_minq_luts(void);
+
+// return of 0 means drop frame
+// Changes rc.this_frame_target and rc.sb64_rate_target
+int vp9_rc_pick_frame_size_and_bounds(VP9_COMP *cpi,
+                                      int *frame_under_shoot_limit,
+                                      int *frame_over_shoot_limit);
+// Picks q and q bounds given the target for bits
+int vp9_rc_pick_q_and_adjust_q_bounds(VP9_COMP *cpi,
+                                      int * bottom_index,
+                                      int * top_index);
+
+// Estimates q to achieve a target bits per frame
+int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame);
+
+// Post encode update of the rate control parameters based
+// on bytes used and q used for the frame
+void vp9_rc_postencode_update(VP9_COMP *cpi,
+                              uint64_t bytes_used,
+                              int q_used);
+
+// estimates bits per mb for a given qindex and correction factor
+int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex,
+                       double correction_factor);
+
+// Post encode update of the rate control parameters for 2-pass
+void vp9_twopass_postencode_update(VP9_COMP *cpi,
+                                   uint64_t bytes_used);
 
 #endif  // VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index ee9c18b21..84b71224e 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -762,32 +762,6 @@ static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
   cpi->tx_stepdown_count[0]++;
 }
 
-static TX_SIZE select_tx_size(TX_MODE tx_mode, TX_SIZE max_tx_size,
-                              int64_t rd[][2]) {
-  if (max_tx_size == TX_32X32 &&
-      (tx_mode == ALLOW_32X32 ||
-       (tx_mode == TX_MODE_SELECT &&
-       rd[TX_32X32][1] < rd[TX_16X16][1] &&
-       rd[TX_32X32][1] < rd[TX_8X8][1] &&
-       rd[TX_32X32][1] < rd[TX_4X4][1]))) {
-    return TX_32X32;
-  } else if (max_tx_size >= TX_16X16 &&
-             (tx_mode == ALLOW_16X16 ||
-              tx_mode == ALLOW_32X32 ||
-              (tx_mode == TX_MODE_SELECT &&
-               rd[TX_16X16][1] < rd[TX_8X8][1] &&
-               rd[TX_16X16][1] < rd[TX_4X4][1]))) {
-    return TX_16X16;
-  } else if (tx_mode == ALLOW_8X8 ||
-             tx_mode == ALLOW_16X16 ||
-             tx_mode == ALLOW_32X32 ||
-             (tx_mode == TX_MODE_SELECT && rd[TX_8X8][1] < rd[TX_4X4][1])) {
-    return TX_8X8;
-  } else {
-    return TX_4X4;
-  }
-}
-
 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
                                      int (*r)[2], int *rate,
                                      int64_t *d, int64_t *distortion,
@@ -802,39 +776,42 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t rd[TX_SIZES][2];
   int n, m;
   int s0, s1;
+  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
+  int64_t best_rd = INT64_MAX;
+  TX_SIZE best_tx = TX_4X4;
 
   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
-
-  for (n = TX_4X4; n <= max_tx_size; n++) {
-    r[n][1] = r[n][0];
-    if (r[n][0] == INT_MAX)
-      continue;
-    for (m = 0; m <= n - (n == max_tx_size); m++) {
-      if (m == n)
-        r[n][1] += vp9_cost_zero(tx_probs[m]);
-      else
-        r[n][1] += vp9_cost_one(tx_probs[m]);
-    }
-  }
-
   assert(skip_prob > 0);
   s0 = vp9_cost_bit(skip_prob, 0);
   s1 = vp9_cost_bit(skip_prob, 1);
 
   for (n = TX_4X4; n <= max_tx_size; n++) {
+    r[n][1] = r[n][0];
+    if (r[n][0] < INT_MAX) {
+      for (m = 0; m <= n - (n == max_tx_size); m++) {
+        if (m == n)
+          r[n][1] += vp9_cost_zero(tx_probs[m]);
+        else
+          r[n][1] += vp9_cost_one(tx_probs[m]);
+      }
+    }
     if (d[n] == INT64_MAX) {
       rd[n][0] = rd[n][1] = INT64_MAX;
-      continue;
-    }
-    if (s[n]) {
+    } else if (s[n]) {
       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
     } else {
       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
     }
+
+    if (rd[n][1] < best_rd) {
+      best_tx = n;
+      best_rd = rd[n][1];
+    }
   }
+  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
+                      best_tx : MIN(max_tx_size, max_mode_tx_size);
 
-  mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd);
 
   *distortion = d[mbmi->tx_size];
   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
@@ -844,29 +821,18 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
-  if (max_tx_size == TX_32X32 &&
-      rd[TX_32X32][1] < rd[TX_16X16][1] && rd[TX_32X32][1] < rd[TX_8X8][1] &&
-      rd[TX_32X32][1] < rd[TX_4X4][1])
-    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
-  else if (max_tx_size >= TX_16X16 &&
-           rd[TX_16X16][1] < rd[TX_8X8][1] && rd[TX_16X16][1] < rd[TX_4X4][1])
-    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
-  else
-    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1] < rd[TX_8X8][1] ?
-                                 rd[TX_4X4][1] : rd[TX_8X8][1];
 
-  if (max_tx_size == TX_32X32 &&
-      rd[TX_32X32][1] < rd[TX_16X16][1] &&
-      rd[TX_32X32][1] < rd[TX_8X8][1] &&
-      rd[TX_32X32][1] < rd[TX_4X4][1]) {
+  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
+    tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
     cpi->tx_stepdown_count[0]++;
-  } else if (max_tx_size >= TX_16X16 &&
-             rd[TX_16X16][1] < rd[TX_8X8][1] &&
-             rd[TX_16X16][1] < rd[TX_4X4][1]) {
+  } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
+    tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
+    tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
   } else {
+    tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
     cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
   }
 }
@@ -886,14 +852,17 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
   int n, m;
   int s0, s1;
   double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
-  // double scale_r[TX_SIZES] = {2.82, 2.00, 1.41, 1.00};
+  const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
+  int64_t best_rd = INT64_MAX;
+  TX_SIZE best_tx = TX_4X4;
 
   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
-
-  // for (n = TX_4X4; n <= max_txfm_size; n++)
-  //   r[n][0] = (r[n][0] * scale_r[n]);
+  assert(skip_prob > 0);
+  s0 = vp9_cost_bit(skip_prob, 0);
+  s1 = vp9_cost_bit(skip_prob, 1);
 
   for (n = TX_4X4; n <= max_tx_size; n++) {
+    double scale = scale_rd[n];
     r[n][1] = r[n][0];
     for (m = 0; m <= n - (n == max_tx_size); m++) {
       if (m == n)
@@ -901,40 +870,29 @@ static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
       else
         r[n][1] += vp9_cost_one(tx_probs[m]);
     }
-  }
-
-  assert(skip_prob > 0);
-  s0 = vp9_cost_bit(skip_prob, 0);
-  s1 = vp9_cost_bit(skip_prob, 1);
-
-  for (n = TX_4X4; n <= max_tx_size; n++) {
     if (s[n]) {
-      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
+      rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
     } else {
-      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
-      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
+      rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
+      rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
+    }
+    if (rd[n][1] < best_rd) {
+      best_rd = rd[n][1];
+      best_tx = n;
     }
-  }
-  for (n = TX_4X4; n <= max_tx_size; n++) {
-    rd[n][0] = (int64_t)(scale_rd[n] * rd[n][0]);
-    rd[n][1] = (int64_t)(scale_rd[n] * rd[n][1]);
   }
 
-  mbmi->tx_size = select_tx_size(cm->tx_mode, max_tx_size, rd);
+  mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
+                      best_tx : MIN(max_tx_size, max_mode_tx_size);
 
   // Actually encode using the chosen mode if a model was used, but do not
   // update the r, d costs
   txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
                    &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
 
-  if (max_tx_size == TX_32X32 &&
-      rd[TX_32X32][1] <= rd[TX_16X16][1] &&
-      rd[TX_32X32][1] <= rd[TX_8X8][1] &&
-      rd[TX_32X32][1] <= rd[TX_4X4][1]) {
+  if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
     cpi->tx_stepdown_count[0]++;
-  } else if (max_tx_size >= TX_16X16 &&
-             rd[TX_16X16][1] <= rd[TX_8X8][1] &&
-             rd[TX_16X16][1] <= rd[TX_4X4][1]) {
+  } else if (max_tx_size >= TX_16X16 &&  best_tx == TX_16X16) {
     cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
   } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
     cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
@@ -954,6 +912,9 @@ static void super_block_yrd(VP9_COMP *cpi,
   MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
   struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
   const int b_inter_mode = is_inter_block(mbmi);
+  const TX_SIZE max_tx_size = max_txsize_lookup[bs];
+  TX_SIZE tx_size;
+
 
   assert(bs == mbmi->sb_type);
   if (b_inter_mode)
@@ -972,34 +933,16 @@ static void super_block_yrd(VP9_COMP *cpi,
 
   if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
       b_inter_mode) {
-    if (bs >= BLOCK_32X32)
-      model_rd_for_sb_y_tx(cpi, bs, TX_32X32, x, xd,
-                           &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]);
-    if (bs >= BLOCK_16X16)
-      model_rd_for_sb_y_tx(cpi, bs, TX_16X16, x, xd,
-                           &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]);
-
-    model_rd_for_sb_y_tx(cpi, bs, TX_8X8, x, xd,
-                         &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]);
-
-    model_rd_for_sb_y_tx(cpi, bs, TX_4X4, x, xd,
-                         &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4]);
-
+    for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+      model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
+                           &r[tx_size][0], &d[tx_size], &s[tx_size]);
     choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
                                   skip, sse, ref_best_rd, bs);
   } else {
-    if (bs >= BLOCK_32X32)
-      txfm_rd_in_plane(x, rdcost_stack, &r[TX_32X32][0], &d[TX_32X32],
-                       &s[TX_32X32], &sse[TX_32X32],
-                       ref_best_rd, 0, bs, TX_32X32);
-    if (bs >= BLOCK_16X16)
-      txfm_rd_in_plane(x, rdcost_stack, &r[TX_16X16][0], &d[TX_16X16],
-                       &s[TX_16X16], &sse[TX_16X16],
-                       ref_best_rd, 0, bs, TX_16X16);
-    txfm_rd_in_plane(x, rdcost_stack, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8],
-                     &sse[TX_8X8], ref_best_rd, 0, bs, TX_8X8);
-    txfm_rd_in_plane(x, rdcost_stack, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4],
-                     &sse[TX_4X4], ref_best_rd, 0, bs, TX_4X4);
+    for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
+      txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size],
+                       &s[tx_size], &sse[tx_size],
+                       ref_best_rd, 0, bs, tx_size);
     choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
                              skip, txfm_cache, bs);
   }
@@ -2150,7 +2093,7 @@ static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
   int best_index = 0;
   int best_sad = INT_MAX;
   int this_sad = INT_MAX;
-  unsigned int max_mv = 0;
+  int max_mv = 0;
 
   uint8_t *src_y_ptr = x->plane[0].src.buf;
   uint8_t *ref_y_ptr;
@@ -3651,7 +3594,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
 
     /* keep record of best compound/single-only prediction */
     if (!disable_skip && ref_frame != INTRA_FRAME) {
-      int single_rd, hybrid_rd, single_rate, hybrid_rate;
+      int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
 
       if (cm->comp_pred_mode == REFERENCE_MODE_SELECT) {
         single_rate = rate2 - compmode_cost;
@@ -4402,7 +4345,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
     /* keep record of best compound/single-only prediction */
     if (!disable_skip && ref_frame != INTRA_FRAME) {
-      int single_rd, hybrid_rd, single_rate, hybrid_rate;
+      int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
 
       if (cpi->common.comp_pred_mode == REFERENCE_MODE_SELECT) {
         single_rate = rate2 - compmode_cost;