10 files changed, 169 insertions, 106 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 425db009e..df4223a23 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2377,6 +2377,19 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
 #endif
     // TODO(jingning): Reduce the actual memory use for tpl model build up.
     for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+#if CONFIG_NON_GREEDY_MV
+      int sqr_bsize;
+      int rf_idx;
+      for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+        for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
+          CHECK_MEM_ERROR(
+              cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
+              vpx_calloc(mi_rows * mi_cols,
+                         sizeof(*cpi->tpl_stats[frame]
+                                     .pyramid_mv_arr[rf_idx][sqr_bsize])));
+        }
+      }
+#endif
       CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
                       vpx_calloc(mi_rows * mi_cols,
                                  sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
@@ -3873,6 +3886,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
   set_size_independent_vars(cpi);
   set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
 
+  // search method and step parameter might be changed in speed settings.
+  init_motion_estimation(cpi);
+
   if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
 
   if (cpi->sf.svc_use_lowres_part &&
@@ -5583,39 +5599,12 @@ void init_tpl_stats(VP9_COMP *cpi) {
 }
 
 #if CONFIG_NON_GREEDY_MV
-static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
-                                int mi_col, int rf_idx, BLOCK_SIZE bsize,
-                                int_mv *nb_full_mvs) {
-  const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
-  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
-  int i;
-  for (i = 0; i < NB_MVS_NUM; ++i) {
-    int r = dirs[i][0] * mi_unit;
-    int c = dirs[i][1] * mi_unit;
-    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
-        mi_col + c < tpl_frame->mi_cols) {
-      const TplDepStats *tpl_ptr =
-          &tpl_frame
-               ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
-      if (tpl_ptr->ready[rf_idx]) {
-        nb_full_mvs[i].as_mv = get_full_mv(&tpl_ptr->mv_arr[rf_idx].as_mv);
-      } else {
-        nb_full_mvs[i].as_int = INVALID_MV;
-      }
-    } else {
-      nb_full_mvs[i].as_int = INVALID_MV;
-    }
-  }
-}
-#endif
-
-#if CONFIG_NON_GREEDY_MV
 uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
                                        int frame_idx, uint8_t *cur_frame_buf,
                                        uint8_t *ref_frame_buf, int stride,
                                        BLOCK_SIZE bsize, int mi_row, int mi_col,
-                                       TplDepStats *tpl_stats, int rf_idx) {
-  MV *mv = &tpl_stats->mv_arr[rf_idx].as_mv;
+                                       MV *mv, int rf_idx, double *mv_dist,
+                                       double *mv_cost) {
 #else   // CONFIG_NON_GREEDY_MV
 uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
                                        int frame_idx, uint8_t *cur_frame_buf,
@@ -5661,12 +5650,11 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
 #if CONFIG_NON_GREEDY_MV
   (void)search_method;
   (void)sadpb;
-  prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize,
-                      nb_full_mvs);
-  vp9_full_pixel_diamond_new(
-      cpi, x, &best_ref_mv1_full, step_param, lambda, 1, &cpi->fn_ptr[bsize],
-      nb_full_mvs, NB_MVS_NUM, &tpl_stats->mv_arr[rf_idx].as_mv,
-      &tpl_stats->mv_dist[rf_idx], &tpl_stats->mv_cost[rf_idx]);
+  vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
+                          bsize, nb_full_mvs);
+  vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
+                             &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv,
+                             mv_dist, mv_cost);
 #else
   (void)frame_idx;
   (void)mi_row;
@@ -5972,8 +5960,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
       vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
                                 dst_stride, xd->bd);
       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
-      // TODO(sdeng): Implement SIMD based high bit-depth satd.
-      intra_cost = vpx_satd_c(coeff, pix_num);
+      intra_cost = vpx_highbd_satd(coeff, pix_num);
     } else {
       vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
                          dst_stride);
@@ -6000,7 +5987,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
 
 #if CONFIG_NON_GREEDY_MV
     (void)td;
-    mv.as_int = tpl_stats->mv_arr[rf_idx].as_int;
+    mv.as_int =
+        get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int;
 #else
     motion_compensated_prediction(
         cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
@@ -6019,7 +6007,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
           bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
           xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
       highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
-      inter_cost = vpx_satd_c(coeff, pix_num);
+      inter_cost = vpx_highbd_satd(coeff, pix_num);
     } else {
       vp9_build_inter_predictor(
           ref_frame[rf_idx]->y_buffer + mb_y_offset,
@@ -6102,6 +6090,7 @@ static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
   set_mv_limits(cm, x, mi_row, mi_col);
 
   for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+    int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
     if (ref_frame[rf_idx] == NULL) {
       tpl_stats->ready[rf_idx] = 0;
       continue;
@@ -6111,7 +6100,8 @@ static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
     motion_compensated_prediction(
         cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
         ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
-        mi_row, mi_col, tpl_stats, rf_idx);
+        mi_row, mi_col, &mv->as_mv, rf_idx, &tpl_stats->mv_dist[rf_idx],
+        &tpl_stats->mv_cost[rf_idx]);
   }
 }
 
@@ -6353,12 +6343,14 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
             &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
         for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
 #if RE_COMPUTE_MV_INCONSISTENCY
+          MV this_mv =
+              get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_mv;
           MV full_mv;
           int_mv nb_full_mvs[NB_MVS_NUM];
-          prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize,
-                              nb_full_mvs);
-          full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3;
-          full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3;
+          vp9_prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize,
+                                  nb_full_mvs);
+          full_mv.row = this_mv.row >> 3;
+          full_mv.col = this_mv.col >> 3;
           this_tpl_stats->mv_cost[rf_idx] =
               vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM);
 #endif  // RE_COMPUTE_MV_INCONSISTENCY
@@ -6412,7 +6404,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
         if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
           const TplDepStats *tpl_ptr =
               &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
-          int_mv mv = tpl_ptr->mv_arr[idx];
+          int_mv mv = *get_pyramid_mv(tpl_frame, idx, bsize, mi_row, mi_col);
           printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col);
         }
       }
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 02814599d..5974750cf 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -297,11 +297,14 @@ typedef struct TplDepStats {
   int64_t inter_cost_arr[3];
   int64_t recon_error_arr[3];
   int64_t sse_arr[3];
-  int_mv mv_arr[3];
   double feature_score;
 #endif
 } TplDepStats;
 
+#if CONFIG_NON_GREEDY_MV
+#define SQUARE_BLOCK_SIZES 4
+#endif
+
 typedef struct TplDepFrame {
   uint8_t is_valid;
   TplDepStats *tpl_stats_ptr;
@@ -315,9 +318,36 @@ typedef struct TplDepFrame {
   double lambda;
   double mv_dist_sum[3];
   double mv_cost_sum[3];
+  int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];
 #endif
 } TplDepFrame;
 
+#if CONFIG_NON_GREEDY_MV
+static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
+  if (bsize == BLOCK_4X4) {
+    return 0;
+  }
+  if (bsize == BLOCK_8X8) {
+    return 1;
+  }
+  if (bsize == BLOCK_16X16) {
+    return 2;
+  }
+  if (bsize == BLOCK_32X32) {
+    return 3;
+  }
+  printf("ERROR: non-square block size\n");
+  assert(0);
+  return -1;
+}
+
+static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx,
+                                     BLOCK_SIZE bsize, int mi_row, int mi_col) {
+  return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)]
+                                   [mi_row * tpl_frame->stride + mi_col];
+}
+#endif
+
 #define TPL_DEP_COST_SCALE_LOG2 4
 
 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index e29e86576..03ac93463 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -1037,23 +1037,28 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
 
     // Other than for the first frame do a motion search.
     if (cm->current_video_frame > 0) {
-      int tmp_err, motion_error, raw_motion_error;
+      int tmp_err, motion_error, this_motion_error, raw_motion_error;
       // Assume 0,0 motion with no mv overhead.
       MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
       struct buf_2d unscaled_last_source_buf_2d;
+      vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
 
       xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
 #if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
         motion_error = highbd_get_prediction_error(
             bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
+        this_motion_error = highbd_get_prediction_error(
+            bsize, &x->plane[0].src, &xd->plane[0].pre[0], 8);
       } else {
         motion_error =
             get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
+        this_motion_error = motion_error;
       }
 #else
       motion_error =
           get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
+      this_motion_error = motion_error;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
       // Compute the motion error of the 0,0 motion using the last source
@@ -1080,6 +1085,15 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
         // starting point (best reference) for the search.
         first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);
 
+        v_fn_ptr.vf = get_block_variance_fn(bsize);
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+          v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, 8);
+        }
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        this_motion_error =
+            vp9_get_mvpred_var(x, &mv, best_ref_mv, &v_fn_ptr, 0);
+
         // If the current best reference mv is not centered on 0,0 then do a
         // 0,0 based search as well.
         if (!is_zero_mv(best_ref_mv)) {
@@ -1089,6 +1103,8 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
           if (tmp_err < motion_error) {
             motion_error = tmp_err;
             mv = tmp_mv;
+            this_motion_error =
+                vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);
           }
         }
 
@@ -1275,7 +1291,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
         int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH);
         if (this_intra_error < scaled_low_intra_thresh) {
           fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
-          if (motion_error < scaled_low_intra_thresh) {
+          if (this_motion_error < scaled_low_intra_thresh) {
             fp_acc_data->intra_count_low += 1.0;
           } else {
             fp_acc_data->intra_count_high += 1.0;
@@ -2301,9 +2317,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
 
     for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) {
       if (arf_depth_boost[idx] == 0) break;
-      arf_depth_bits[idx] =
-          calculate_boost_bits(rc->baseline_gf_interval - total_arfs,
-                               arf_depth_boost[idx], total_group_bits);
+      arf_depth_bits[idx] = calculate_boost_bits(
+          rc->baseline_gf_interval - total_arfs - arf_depth_count[idx],
+          arf_depth_boost[idx], total_group_bits);
 
       total_group_bits -= arf_depth_bits[idx];
       total_arfs += arf_depth_count[idx];
@@ -2675,8 +2691,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
   }
 
   // Calculate the extra bits to be used for boosted frame(s)
-  gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost,
-                                     gf_group_bits);
+  gf_arf_bits = calculate_boost_bits((rc->baseline_gf_interval - 1),
+                                     rc->gfu_boost, gf_group_bits);
 
   // Adjust KF group bits and error remaining.
   twopass->kf_group_error_left -= gf_group_err;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 316227e3c..5a6717ab2 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1879,6 +1879,34 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x,
   }
   return bestsad;
 }
+
+void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
+                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
+                             int_mv *nb_full_mvs) {
+  const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+  const int mi_height = num_8x8_blocks_high_lookup[bsize];
+  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+  int i;
+  for (i = 0; i < NB_MVS_NUM; ++i) {
+    int r = dirs[i][0] * mi_height;
+    int c = dirs[i][1] * mi_width;
+    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+        mi_col + c < tpl_frame->mi_cols) {
+      const TplDepStats *tpl_ptr =
+          &tpl_frame
+               ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
+      int_mv *mv =
+          get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
+      if (tpl_ptr->ready[rf_idx]) {
+        nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
+      } else {
+        nb_full_mvs[i].as_int = INVALID_MV;
+      }
+    } else {
+      nb_full_mvs[i].as_int = INVALID_MV;
+    }
+  }
+}
 #endif  // CONFIG_NON_GREEDY_MV
 
 int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 6d89fdfdd..ab69afdcd 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -143,6 +143,11 @@ static INLINE MV get_full_mv(const MV *mv) {
   out_mv.col = mv->col >> 3;
   return out_mv;
 }
+
+struct TplDepFrame;
+void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
+                             int mi_col, int rf_idx, BLOCK_SIZE bsize,
+                             int_mv *nb_full_mvs);
 #endif  // CONFIG_NON_GREEDY_MV
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index fe8f24444..babfe4a33 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -233,10 +233,10 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   if (rv && search_subpel) {
-    int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
-    if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+    SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop;
+    if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = HALF_PEL;
     if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
-      int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+      const int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
       if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
           abs(tmp_mv->as_mv.col) >= mv_thresh)
         subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b5c002aea..5ad68e2e5 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -417,6 +417,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
 
   for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
     rc->rate_correction_factors[i] = 1.0;
+    rc->damped_adjustment[i] = 0;
   }
 
   rc->min_gf_interval = oxcf->min_gf_interval;
@@ -720,6 +721,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
   int correction_factor = 100;
   double rate_correction_factor = get_rate_correction_factor(cpi);
   double adjustment_limit;
+  RATE_FACTOR_LEVEL rf_lvl =
+      cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
 
   int projected_size_based_on_q = 0;
 
@@ -746,10 +749,16 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
     correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
                               projected_size_based_on_q);
 
-  // More heavily damped adjustment used if we have been oscillating either side
-  // of target.
-  adjustment_limit =
-      0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
+  // Do not use damped adjustment for the first frame of each frame type
+  if (!cpi->rc.damped_adjustment[rf_lvl]) {
+    adjustment_limit = 1.0;
+    cpi->rc.damped_adjustment[rf_lvl] = 1;
+  } else {
+    // More heavily damped adjustment used if we have been oscillating either
+    // side of target.
+    adjustment_limit =
+        0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
+  }
 
   cpi->rc.q_2_frame = cpi->rc.q_1_frame;
   cpi->rc.q_1_frame = cm->base_qindex;
@@ -1403,6 +1412,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
   int active_worst_quality = cpi->twopass.active_worst_quality;
   int q;
   int *inter_minq;
+  const int boost_frame =
+      !rc->is_src_frame_alt_ref &&
+      (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
+
   ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
 
   if (oxcf->rc_mode == VPX_Q)
@@ -1410,8 +1423,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
 
   if (frame_is_intra_only(cm)) {
     pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality);
-  } else if (!rc->is_src_frame_alt_ref &&
-             (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+  } else if (boost_frame) {
     // Use the lower of active_worst_quality and recent
     // average Q as basis for GF/ARF best Q limit unless last frame was
     // a key frame.
@@ -1455,9 +1467,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
 
   // Extension to max or min Q if undershoot or overshoot is outside
   // the permitted range.
-  if (frame_is_intra_only(cm) ||
-      (!rc->is_src_frame_alt_ref &&
-       (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
+  if (frame_is_intra_only(cm) || boost_frame) {
     active_best_quality -=
         (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
     active_worst_quality += (cpi->twopass.extend_maxq / 2);
@@ -1465,13 +1475,9 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
     active_best_quality -=
         (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
     active_worst_quality += cpi->twopass.extend_maxq;
-  }
 
-  // For normal frames do not allow an active minq lower than the q used for
-  // the last boosted frame.
-  if (!frame_is_intra_only(cm) &&
-      (!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) ||
-       rc->is_src_frame_alt_ref)) {
+    // For normal frames do not allow an active minq lower than the q used for
+    // the last boosted frame.
     active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex);
   }
 
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index a343bd34b..a5c1f4cf0 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -195,6 +195,8 @@ typedef struct {
   int use_post_encode_drop;
   // External flag to enable post encode frame dropping, controlled by user.
   int ext_use_post_encode_drop;
+
+  int damped_adjustment[RATE_FACTOR_LEVELS];
 } RATE_CONTROL;
 
 struct VP9_COMP;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index b0983281d..e4a5f3e18 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -532,7 +532,7 @@ static void set_rt_speed_feature_framesize_independent(
     sf->adjust_partitioning_from_last_frame =
         cm->last_frame_type != cm->frame_type ||
         (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
-    sf->mv.subpel_force_stop = 1;
+    sf->mv.subpel_force_stop = QUARTER_PEL;
     for (i = 0; i < TX_SIZES; i++) {
       sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
       sf->intra_uv_mode_mask[i] = INTRA_DC;
@@ -709,7 +709,6 @@ static void set_rt_speed_feature_framesize_independent(
     // For SVC: enable use of lower resolution partition for higher resolution,
     // only for 3 spatial layers and when config/top resolution is above VGA.
     // Enable only for non-base temporal layer frames.
-    // TODO(jianj): Investigate webm:1578
     if (cpi->use_svc && cpi->svc.use_partition_reuse &&
         cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 &&
         cpi->oxcf.width * cpi->oxcf.height > 640 * 480)
@@ -730,7 +729,7 @@ static void set_rt_speed_feature_framesize_independent(
     if (cpi->row_mt && cpi->oxcf.max_threads > 1)
       sf->adaptive_rd_thresh_row_mt = 1;
 
-    if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = 3;
+    if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = FULL_PEL;
     if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
     // Only keep INTRA_DC mode for speed 8.
     if (!is_keyframe) {
@@ -766,8 +765,8 @@ static void set_rt_speed_feature_framesize_independent(
     sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
     if (cpi->rc.avg_frame_low_motion < 40)
       sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
-    sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
-    sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+    sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL;
+    sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL;
     // Disable partition blocks below 16x16, except for low-resolutions.
     if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240)
       sf->disable_16x16part_nonkey = 1;
@@ -796,18 +795,11 @@ static void set_rt_speed_feature_framesize_independent(
   }
   // Special case for screen content: increase motion search on base spatial
   // layer when high motion is detected or previous SL0 frame was dropped.
-  // Avoid speed 5 for as there is an issue with SVC datarate test.
-  // TODO(marpan/jianj): Investigate issue at speed 5.
-  if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed > 5 &&
+  if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 &&
       cpi->svc.spatial_layer_id == 0 &&
       (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) {
     sf->mv.search_method = NSTEP;
     sf->mv.fullpel_search_step_param = 2;
-    // TODO(marpan/jianj): Investigate issue for lower setting of step_param
-    // for spatial layers (namely on lower layers).
-    if (cpi->use_svc && cm->width != cpi->oxcf.width &&
-        cm->height != cpi->oxcf.height)
-      sf->mv.fullpel_search_step_param = 4;
   }
 }
 
@@ -854,12 +846,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
   if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
       oxcf->max_threads > 1)
     sf->adaptive_rd_thresh = 0;
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.motion_vector_unit_test == 1)
-    cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.motion_vector_unit_test == 2)
-    cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
 }
 
 void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
@@ -875,7 +861,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   sf->recode_loop = ALLOW_RECODE_FIRST;
   sf->mv.subpel_search_method = SUBPEL_TREE;
   sf->mv.subpel_search_level = 2;
-  sf->mv.subpel_force_stop = 0;
+  sf->mv.subpel_force_stop = EIGHTH_PEL;
   sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
   sf->mv.reduce_first_step_size = 0;
   sf->coeff_prob_appx_step = 1;
@@ -992,7 +978,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
     sf->optimize_coefficients = 0;
   }
 
-  if (sf->mv.subpel_force_stop == 3) {
+  if (sf->mv.subpel_force_stop == FULL_PEL) {
     // Whole pel only
     cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree;
   } else if (sf->mv.subpel_search_method == SUBPEL_TREE) {
@@ -1005,6 +991,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
     cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore;
   }
 
+  // This is only used in motion vector unit test.
+  if (cpi->oxcf.motion_vector_unit_test == 1)
+    cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
+  else if (cpi->oxcf.motion_vector_unit_test == 2)
+    cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
+
   x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
 
   x->min_partition_size = sf->default_min_partition_size;
@@ -1022,10 +1014,4 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
       oxcf->max_threads > 1)
     sf->adaptive_rd_thresh = 0;
-
-  // This is only used in motion vector unit test.
-  if (cpi->oxcf.motion_vector_unit_test == 1)
-    cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
-  else if (cpi->oxcf.motion_vector_unit_test == 2)
-    cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
 }
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 02673e602..9b09ec474 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -167,15 +167,17 @@ typedef enum {
   ONE_LOOP_REDUCED = 1
 } FAST_COEFF_UPDATE;
 
+typedef enum { EIGHTH_PEL, QUARTER_PEL, HALF_PEL, FULL_PEL } SUBPEL_FORCE_STOP;
+
 typedef struct ADAPT_SUBPEL_FORCE_STOP {
   // Threshold for full pixel motion vector;
   int mv_thresh;
 
   // subpel_force_stop if full pixel MV is below the threshold.
-  int force_stop_below;
+  SUBPEL_FORCE_STOP force_stop_below;
 
   // subpel_force_stop if full pixel MV is equal to or above the threshold.
-  int force_stop_above;
+  SUBPEL_FORCE_STOP force_stop_above;
 } ADAPT_SUBPEL_FORCE_STOP;
 
 typedef struct MV_SPEED_FEATURES {
@@ -200,12 +202,8 @@ typedef struct MV_SPEED_FEATURES {
   // extensive subpel search.
   int subpel_search_level;
 
-  // Control when to stop subpel search:
-  // 0: Full subpel search.
-  // 1: Stop at quarter pixel.
-  // 2: Stop at half pixel.
-  // 3: Stop at full pixel.
-  int subpel_force_stop;
+  // When to stop subpel motion search.
+  SUBPEL_FORCE_STOP subpel_force_stop;
 
   // If it's enabled, different subpel_force_stop will be used for different MV.
   int enable_adaptive_subpel_force_stop;