diff options
author | Jerome Jiang <jianj@google.com> | 2018-03-29 14:59:58 -0700 |
---|---|---|
committer | Marco Paniconi <marpan@google.com> | 2018-05-14 08:47:18 -0700 |
commit | 66aca163f5017fc50276f1c3025f2b2cc08aa010 (patch) | |
tree | 4ecbd96568544e628beceb8bb50c77581d006254 | |
parent | c85c5337bfe2f64503b8e8f4584db63d6cd64d61 (diff) | |
download | libvpx-66aca163f5017fc50276f1c3025f2b2cc08aa010.tar libvpx-66aca163f5017fc50276f1c3025f2b2cc08aa010.tar.gz libvpx-66aca163f5017fc50276f1c3025f2b2cc08aa010.tar.bz2 libvpx-66aca163f5017fc50276f1c3025f2b2cc08aa010.zip |
VP9: Add speed 9 for subpel search.
Set subpel search stop to 2 when motion vector is non zero.
10% speedup on 1 and 2 threads on Samsung Galaxy S8+.
Change-Id: I7323bb913000229cf60a37495bf88bcc51d0ac96
-rw-r--r-- | test/cpu_speed_test.cc | 2 | ||||
-rw-r--r-- | test/encode_perf_test.cc | 2 | ||||
-rw-r--r-- | test/svc_datarate_test.cc | 13 | ||||
-rw-r--r-- | test/vp9_datarate_test.cc | 9 | ||||
-rw-r--r-- | test/vp9_ethread_test.cc | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 16 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 2 |
9 files changed, 48 insertions, 14 deletions
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc index 404b5b44f..34e35b065 100644 --- a/test/cpu_speed_test.cc +++ b/test/cpu_speed_test.cc @@ -152,5 +152,5 @@ VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest, ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(0, 9)); + ::testing::Range(0, 10)); } // namespace diff --git a/test/encode_perf_test.cc b/test/encode_perf_test.cc index 0bb435502..142d9e2da 100644 --- a/test/encode_perf_test.cc +++ b/test/encode_perf_test.cc @@ -48,7 +48,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), }; -const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 }; +const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 }; const int kEncodePerfTestThreads[] = { 1, 2, 4 }; #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0])) diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc index 608f27ebf..5156db750 100644 --- a/test/svc_datarate_test.cc +++ b/test/svc_datarate_test.cc @@ -1150,20 +1150,21 @@ TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc2SL3TLSmallKf) { } VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR, - ::testing::Range(5, 9)); + ::testing::Range(5, 10)); -VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 9), +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10), ::testing::Range(0, 3)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR, - ::testing::Range(5, 9), ::testing::Range(0, 2), + ::testing::Range(5, 10), ::testing::Range(0, 2), ::testing::Range(0, 3)); #if CONFIG_VP9_TEMPORAL_DENOISING -VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, ::testing::Range(5, 9), - ::testing::Range(1, 3), ::testing::Range(0, 3)); +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, + ::testing::Range(5, 10), ::testing::Range(1, 3), + ::testing::Range(0, 3)); #endif -VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 9), +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10), ::testing::Range(32, 36)); } // namespace diff --git a/test/vp9_datarate_test.cc b/test/vp9_datarate_test.cc index c4dbcacbe..cb3aad839 100644 --- a/test/vp9_datarate_test.cc +++ b/test/vp9_datarate_test.cc @@ -824,16 +824,17 @@ TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) { VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(2, 9), ::testing::Range(0, 4)); + ::testing::Range(2, 10), ::testing::Range(0, 4)); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeOneBR, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(2, 9)); + ::testing::Range(2, 10)); -VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 9)); +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10)); #if CONFIG_VP9_TEMPORAL_DENOISING -VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, ::testing::Range(5, 9)); +VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser, + ::testing::Range(5, 10)); #endif } // namespace diff --git a/test/vp9_ethread_test.cc b/test/vp9_ethread_test.cc index 6b7e51211..44659904f 100644 --- a/test/vp9_ethread_test.cc +++ b/test/vp9_ethread_test.cc @@ -409,7 +409,7 @@ INSTANTIATE_TEST_CASE_P( ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), - ::testing::Range(3, 9), // cpu_used + ::testing::Range(3, 10), // cpu_used ::testing::Range(0, 3), // tile_columns ::testing::Range(2, 5))); // threads diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index f7d420bc7..f86c9f092 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (rv && search_subpel) { int subpel_force_stop = cpi->sf.mv.subpel_force_stop; if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2; + if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { + int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; + if (abs(tmp_mv->as_mv.row) >= mv_thresh || + abs(tmp_mv->as_mv.col) >= mv_thresh) + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; + else + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below; + } cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 90da68726..05ec3c612 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -661,6 +661,14 @@ static void set_rt_speed_feature_framesize_independent( sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 1; } + + if (speed >= 9) { + sf->mv.enable_adaptive_subpel_force_stop = 1; + sf->mv.adapt_subpel_force_stop.mv_thresh = 2; + sf->mv.adapt_subpel_force_stop.force_stop_below = 1; + sf->mv.adapt_subpel_force_stop.force_stop_above = 2; + } + if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 946bf0545..8595e54ab 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -161,6 +161,17 @@ typedef enum { ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; +typedef struct ADAPT_SUBPEL_FORCE_STOP { + // Threshold for full pixel motion vector; + int mv_thresh; + + // subpel_force_stop if full pixel MV is below the threshold. + int force_stop_below; + + // subpel_force_stop if full pixel MV is equal to or above the threshold. + int force_stop_above; +} ADAPT_SUBPEL_FORCE_STOP; + typedef struct MV_SPEED_FEATURES { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; @@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES { // 3: Stop at full pixel. int subpel_force_stop; + // If it's enabled, different subpel_force_stop will be used for different MV. + int enable_adaptive_subpel_force_stop; + + ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop; + // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; } MV_SPEED_FEATURES; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 5eaa7a18a..d6c6ece91 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(extra_cfg, row_mt, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); - RANGE_CHECK(extra_cfg, cpu_used, -8, 8); + RANGE_CHECK(extra_cfg, cpu_used, -9, 9); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); |