1 files changed, 62 insertions, 37 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 5adefac1a..236567f94 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3440,18 +3440,59 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
 #undef FEATURES
 #undef LABELS
 
+// Perform fast and coarse motion search for the given block. This is a
+// pre-processing step for the ML based partition search speedup.
+static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x,
+                                 BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                 MV ref_mv, MV_REFERENCE_FRAME ref,
+                                 uint8_t *const pred_buf) {
+  const VP9_COMMON *const cm = &cpi->common;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MODE_INFO *const mi = xd->mi[0];
+  const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref);
+  const int step_param = 1;
+  const MvLimits tmp_mv_limits = x->mv_limits;
+  const SEARCH_METHODS search_method = NSTEP;
+  const int sadpb = x->sadperbit16;
+  MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 };
+  MV best_mv = { 0, 0 };
+  int cost_list[5];
+
+  assert(yv12 != NULL);
+  if (!yv12) return;
+  vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+                       &cm->frame_refs[ref - 1].sf);
+  mi->ref_frame[0] = ref;
+  mi->ref_frame[1] = NONE;
+  mi->sb_type = bsize;
+  vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
+  vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method,
+                        sadpb, cond_cost_list(cpi, cost_list), &ref_mv,
+                        &best_mv, 0, 0);
+  best_mv.row *= 8;
+  best_mv.col *= 8;
+  x->mv_limits = tmp_mv_limits;
+  mi->mv[0].as_mv = best_mv;
+
+  set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+  xd->plane[0].dst.buf = pred_buf;
+  xd->plane[0].dst.stride = 64;
+  vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+}
+
 // Use a neural net model to prune partition-none and partition-split search.
 // The model uses prediction residue variance and quantization step size as
 // input features.
 #define FEATURES 6
-static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi,
+                                          MACROBLOCK *const x,
+                                          PC_TREE *const pc_tree,
                                           BLOCK_SIZE bsize, int mi_row,
                                           int mi_col, int *none, int *split) {
-  VP9_COMMON *const cm = &cpi->common;
-  MACROBLOCKD *xd = &x->e_mbd;
-  MODE_INFO *mi = xd->mi[0];
+  const VP9_COMMON *const cm = &cpi->common;
   const NN_CONFIG *nn_config = NULL;
 #if CONFIG_VP9_HIGHBITDEPTH
+  MACROBLOCKD *xd = &x->e_mbd;
   DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
   uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
                                 ? (CONVERT_TO_BYTEPTR(pred_buffer))
@@ -3489,41 +3530,20 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
 
   if (!nn_config) return;
 
-  mi->ref_frame[1] = NONE;
-  mi->sb_type = bsize;
   // Do a simple single motion search to find a prediction for current block.
   // The variance of the residue will be used as input features.
   {
+    MV ref_mv;
     const MV_REFERENCE_FRAME ref =
         cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
-    YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
-    MV ref_mv = { 0, 0 };
-    MV ref_mv_full = { 0, 0 };
-    const int step_param = 1;
-    const MvLimits tmp_mv_limits = x->mv_limits;
-    const SEARCH_METHODS search_method = NSTEP;
-    const int sadpb = x->sadperbit16;
-    MV best_mv = { 0, 0 };
-    int cost_list[5];
-
-    assert(yv12 != NULL);
-    if (!yv12) return;
-    vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
-                         &cm->frame_refs[ref - 1].sf);
-    mi->ref_frame[0] = ref;
-    vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
-    vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
-                          search_method, sadpb, cond_cost_list(cpi, cost_list),
-                          &ref_mv, &best_mv, 0, 0);
-    best_mv.row *= 8;
-    best_mv.col *= 8;
-    x->mv_limits = tmp_mv_limits;
-    mi->mv[0].as_mv = best_mv;
-
-    set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
-    xd->plane[0].dst.buf = pred_buf;
-    xd->plane[0].dst.stride = 64;
-    vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+    // If bsize is 64x64, use zero MV as reference; otherwise, use MV result
+    // of previous(larger) block as reference.
+    if (bsize == BLOCK_64X64)
+      ref_mv.row = ref_mv.col = 0;
+    else
+      ref_mv = pc_tree->mv;
+    simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf);
+    pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv;
   }
 
   vpx_clear_system_state();
@@ -3818,14 +3838,19 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
 
   pc_tree->partitioning = PARTITION_NONE;
 
-  if (cpi->sf.ml_var_partition_pruning) {
+  if (cpi->sf.ml_var_partition_pruning && !frame_is_intra_only(cm)) {
     const int do_ml_var_partition_pruning =
-        !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
+        partition_none_allowed && do_split &&
         mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
         mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
     if (do_ml_var_partition_pruning) {
-      ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
+      ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col,
                                     &partition_none_allowed, &do_split);
+    } else {
+      vp9_zero(pc_tree->mv);
+    }
+    if (bsize > BLOCK_8X8) {  // Store MV result as reference for subblocks.
+      for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv;
     }
   }