From e8b2750904fd78cf168fa196b38d35594b060e8a Mon Sep 17 00:00:00 2001 From: James Zern Date: Sat, 6 Apr 2019 17:07:13 -0700 Subject: loop_filter_rows_mt: unify worker count calculation fixes a deadlock with an odd number of threads that go from < number of tiles to >. the previous calculations were out of sync so going from e.g., 8 tiles to 2 with 3 threads would result in scheduling only 2 workers, but thread_loop_filter_rows() would expect 3. BUG=webm:1618 Change-Id: I78c967a8c3c927d929e13c949808a5ef443ebacb --- test/vp9_end_to_end_test.cc | 59 ++++++++++++++++++++++++++++++++++++++++++ vp9/common/vp9_thread_common.c | 7 ++++- vp9/common/vp9_thread_common.h | 3 ++- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/test/vp9_end_to_end_test.cc b/test/vp9_end_to_end_test.cc index b4acd8ff9..7cb716f22 100644 --- a/test/vp9_end_to_end_test.cc +++ b/test/vp9_end_to_end_test.cc @@ -193,6 +193,50 @@ class EndToEndTestLarge libvpx_test::TestMode encoding_mode_; }; +#if CONFIG_VP9_DECODER +// The test parameters control VP9D_SET_LOOP_FILTER_OPT and the number of +// decoder threads. +class EndToEndTestLoopFilterThreading + : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { + protected: + EndToEndTestLoopFilterThreading() + : EncoderTest(GET_PARAM(0)), use_loop_filter_opt_(GET_PARAM(1)) {} + + virtual ~EndToEndTestLoopFilterThreading() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(::libvpx_test::kRealTime); + cfg_.g_threads = 2; + cfg_.g_lag_in_frames = 0; + cfg_.rc_target_bitrate = 500; + cfg_.rc_end_usage = VPX_CBR; + cfg_.kf_min_dist = 1; + cfg_.kf_max_dist = 1; + dec_cfg_.threads = GET_PARAM(2); + } + + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(VP8E_SET_CPUUSED, 8); + } + encoder->Control(VP9E_SET_TILE_COLUMNS, 4 - video->frame() % 5); + } + + virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Decoder *decoder) { + if (video->frame() == 0) { + decoder->Control(VP9D_SET_LOOP_FILTER_OPT, use_loop_filter_opt_ ? 1 : 0); + } + } + + private: + const bool use_loop_filter_opt_; +}; +#endif // CONFIG_VP9_DECODER + TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; @@ -255,6 +299,16 @@ TEST_P(EndToEndTestAdaptiveRDThresh, EndtoEndAdaptiveRDThreshRowMT) { ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } +#if CONFIG_VP9_DECODER +TEST_P(EndToEndTestLoopFilterThreading, TileCountChange) { + ::libvpx_test::RandomVideoSource video; + video.SetSize(4096, 2160); + video.set_limit(10); + + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); +} +#endif // CONFIG_VP9_DECODER + VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), @@ -262,4 +316,9 @@ VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge, VP9_INSTANTIATE_TEST_CASE(EndToEndTestAdaptiveRDThresh, ::testing::Values(5, 6, 7), ::testing::Values(8, 9)); + +#if CONFIG_VP9_DECODER +VP9_INSTANTIATE_TEST_CASE(EndToEndTestLoopFilterThreading, ::testing::Bool(), + ::testing::Range(2, 6)); +#endif // CONFIG_VP9_DECODER } // namespace diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index 00882a5f9..c79d9b7f0 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" @@ -92,7 +93,7 @@ static INLINE void thread_loop_filter_rows( int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; - const int num_active_workers = VPXMIN(lf_sync->num_workers, lf_sync->rows); + const int num_active_workers = lf_sync->num_active_workers; int mi_row, mi_col; enum lf_path path; if (y_only) @@ -104,6 +105,8 @@ static INLINE void thread_loop_filter_rows( else path = LF_PATH_SLOW; + assert(num_active_workers > 0); + for (mi_row = start; mi_row < stop; mi_row += num_active_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; @@ -172,6 +175,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, vp9_loop_filter_dealloc(lf_sync); vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } + lf_sync->num_active_workers = num_workers; // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); @@ -319,6 +323,7 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, CHECK_MEM_ERROR(cm, lf_sync->lfdata, vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); lf_sync->num_workers = num_workers; + lf_sync->num_active_workers = lf_sync->num_workers; CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h index 1a2d79abd..94c9de659 100644 --- a/vp9/common/vp9_thread_common.h +++ b/vp9/common/vp9_thread_common.h @@ -36,7 +36,8 @@ typedef struct VP9LfSyncData { // Row-based parallel loopfilter data LFWorkerData *lfdata; - int num_workers; + int num_workers; // number of allocated workers. + int num_active_workers; // number of scheduled workers. #if CONFIG_MULTITHREAD pthread_mutex_t lf_mutex; -- cgit v1.2.3