diff options
author | Jim Bankoski <jimbankoski@google.com> | 2016-12-12 16:27:21 -0800 |
---|---|---|
committer | James Zern <jzern@google.com> | 2016-12-13 02:11:34 +0000 |
commit | 85a541a421894981cc91ea198faf74eb9146cece (patch) | |
tree | 775217b6213c4bb121151813b0f290ca4eed1a81 /vp8/decoder | |
parent | 121e161115d86c65101ec9f1ec1564cdd9e58598 (diff) | |
download | libvpx-85a541a421894981cc91ea198faf74eb9146cece.tar libvpx-85a541a421894981cc91ea198faf74eb9146cece.tar.gz libvpx-85a541a421894981cc91ea198faf74eb9146cece.tar.bz2 libvpx-85a541a421894981cc91ea198faf74eb9146cece.zip |
Reapply 'Amend and improve VP8 multithreading implementation'
Reapply this patch:
ff0107f Amend and improve VP8 multithreading implementation
Amended the patch to add a unit test, and fix an asan error.
BUG=webm:851
Change-Id: I6572c03256169c64e80248bf5a5e99f59a2fc93c
Diffstat (limited to 'vp8/decoder')
-rw-r--r-- | vp8/decoder/onyxd_int.h | 5 | ||||
-rw-r--r-- | vp8/decoder/threading.c | 58 |
2 files changed, 44 insertions, 19 deletions
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index e50fafd4f..88b1ff16b 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -67,7 +67,8 @@ typedef struct VP8D_COMP { #if CONFIG_MULTITHREAD /* variable for threading */ - volatile int b_multithreaded_rd; + + int b_multithreaded_rd; int max_threads; int current_mb_col_main; unsigned int decoding_thread_count; @@ -76,6 +77,8 @@ typedef struct VP8D_COMP { int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; int *mt_current_mb_col; /* Each row remembers its already decoded column. */ + pthread_mutex_t *pmutex; + pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */ unsigned char **mt_yabove_row; /* mb_rows x width */ unsigned char **mt_uabove_row; diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index 44ca16bfd..5b6200f67 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -50,9 +50,6 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); - mbd->mode_info_stride = pc->mode_info_stride; - mbd->frame_type = pc->frame_type; mbd->pre = xd->pre; mbd->dst = xd->dst; @@ -251,8 +248,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) { - volatile const int *last_row_current_mb_col; - volatile int *current_mb_col; + const int *last_row_current_mb_col; + int *current_mb_col; int mb_row; VP8_COMMON *pc = &pbi->common; const int nsync = pbi->sync_range; @@ -289,6 +286,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->up_available = (start_mb_row != 0); + xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; + xd->mode_info_stride = pc->mode_info_stride; + for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { int recon_yoffset, recon_uvoffset; @@ -355,14 +355,15 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride); } - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - *current_mb_col = mb_col - 1; + for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) { + if (((mb_col - 1) % nsync) == 0) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row]; + protected_write(mutex, current_mb_col, mb_col - 1); + } - if ((mb_col & (nsync - 1)) == 0) { - while (mb_col > (*last_row_current_mb_col - nsync)) { - x86_pause_hint(); - thread_sleep(0); - } + if (mb_row && !(mb_col & (nsync - 1))) { + pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1]; + sync_read(mutex, mb_col, last_row_current_mb_col, nsync); } /* Distance of MB to the various image edges. @@ -548,7 +549,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, } /* last MB of row is ready just after extension is done */ - *current_mb_col = mb_col + nsync; + protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; @@ -568,10 +569,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) { ENTROPY_CONTEXT_PLANES mb_row_left_context; while (1) { - if (pbi->b_multithreaded_rd == 0) break; + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break; if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { - if (pbi->b_multithreaded_rd == 0) { + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) { break; } else { MACROBLOCKD *xd = &mbrd->mbd; @@ -591,6 +592,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) { pbi->b_multithreaded_rd = 0; pbi->allocated_decoding_thread_count = 0; + pthread_mutex_init(&pbi->mt_mutex, NULL); /* limit decoding threads to the max number of token partitions */ core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; @@ -647,6 +649,16 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) { void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { int i; + /* De-allocate mutex */ + if (pbi->pmutex != NULL) { + for (i = 0; i < mb_rows; ++i) { + pthread_mutex_destroy(&pbi->pmutex[i]); + } + + vpx_free(pbi->pmutex); + pbi->pmutex = NULL; + } + vpx_free(pbi->mt_current_mb_col); pbi->mt_current_mb_col = NULL; @@ -712,7 +724,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { int i; int uv_width; - if (pbi->b_multithreaded_rd) { + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) { vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); /* our internal buffers are always multiples of 16 */ @@ -730,6 +742,15 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { uv_width = width >> 1; + /* Allocate mutex */ + CHECK_MEM_ERROR(pbi->pmutex, + vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows)); + if (pbi->pmutex) { + for (i = 0; i < pc->mb_rows; ++i) { + pthread_mutex_init(&pbi->pmutex[i], NULL); + } + } + /* Allocate an int for each mb row. */ CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows); @@ -772,9 +793,9 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { void vp8_decoder_remove_threads(VP8D_COMP *pbi) { /* shutdown MB Decoding thread; */ - if (pbi->b_multithreaded_rd) { + if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) { int i; - pbi->b_multithreaded_rd = 0; + protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0); /* allow all threads to exit */ for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { @@ -804,6 +825,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) { vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); } + pthread_mutex_destroy(&pbi->mt_mutex); } void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { |