summaryrefslogtreecommitdiff
path: root/vp8/decoder
diff options
context:
space:
mode:
authorJim Bankoski <jimbankoski@google.com>2016-12-12 16:27:21 -0800
committerJames Zern <jzern@google.com>2016-12-13 02:11:34 +0000
commit85a541a421894981cc91ea198faf74eb9146cece (patch)
tree775217b6213c4bb121151813b0f290ca4eed1a81 /vp8/decoder
parent121e161115d86c65101ec9f1ec1564cdd9e58598 (diff)
downloadlibvpx-85a541a421894981cc91ea198faf74eb9146cece.tar
libvpx-85a541a421894981cc91ea198faf74eb9146cece.tar.gz
libvpx-85a541a421894981cc91ea198faf74eb9146cece.tar.bz2
libvpx-85a541a421894981cc91ea198faf74eb9146cece.zip
Reapply 'Amend and improve VP8 multithreading implementation'
Reapply this patch: ff0107f Amend and improve VP8 multithreading implementation Amended the patch to add a unit test, and fix an asan error. BUG=webm:851 Change-Id: I6572c03256169c64e80248bf5a5e99f59a2fc93c
Diffstat (limited to 'vp8/decoder')
-rw-r--r--vp8/decoder/onyxd_int.h5
-rw-r--r--vp8/decoder/threading.c58
2 files changed, 44 insertions, 19 deletions
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index e50fafd4f..88b1ff16b 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -67,7 +67,8 @@ typedef struct VP8D_COMP {
#if CONFIG_MULTITHREAD
/* variable for threading */
- volatile int b_multithreaded_rd;
+
+ int b_multithreaded_rd;
int max_threads;
int current_mb_col_main;
unsigned int decoding_thread_count;
@@ -76,6 +77,8 @@ typedef struct VP8D_COMP {
int mt_baseline_filter_level[MAX_MB_SEGMENTS];
int sync_range;
int *mt_current_mb_col; /* Each row remembers its already decoded column. */
+ pthread_mutex_t *pmutex;
+ pthread_mutex_t mt_mutex; /* mutex for b_multithreaded_rd */
unsigned char **mt_yabove_row; /* mb_rows x width */
unsigned char **mt_uabove_row;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 44ca16bfd..5b6200f67 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -50,9 +50,6 @@ static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
mbd->subpixel_predict8x8 = xd->subpixel_predict8x8;
mbd->subpixel_predict16x16 = xd->subpixel_predict16x16;
- mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1);
- mbd->mode_info_stride = pc->mode_info_stride;
-
mbd->frame_type = pc->frame_type;
mbd->pre = xd->pre;
mbd->dst = xd->dst;
@@ -251,8 +248,8 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
int start_mb_row) {
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col;
+ const int *last_row_current_mb_col;
+ int *current_mb_col;
int mb_row;
VP8_COMMON *pc = &pbi->common;
const int nsync = pbi->sync_range;
@@ -289,6 +286,9 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
xd->up_available = (start_mb_row != 0);
+ xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row;
+ xd->mode_info_stride = pc->mode_info_stride;
+
for (mb_row = start_mb_row; mb_row < pc->mb_rows;
mb_row += (pbi->decoding_thread_count + 1)) {
int recon_yoffset, recon_uvoffset;
@@ -355,14 +355,15 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
xd->dst.uv_stride);
}
- for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) {
- *current_mb_col = mb_col - 1;
+ for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) {
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &pbi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0) {
- while (mb_col > (*last_row_current_mb_col - nsync)) {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &pbi->pmutex[mb_row - 1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
/* Distance of MB to the various image edges.
@@ -548,7 +549,7 @@ static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd,
}
/* last MB of row is ready just after extension is done */
- *current_mb_col = mb_col + nsync;
+ protected_write(&pbi->pmutex[mb_row], current_mb_col, mb_col + nsync);
++xd->mode_info_context; /* skip prediction column */
xd->up_available = 1;
@@ -568,10 +569,10 @@ static THREAD_FUNCTION thread_decoding_proc(void *p_data) {
ENTROPY_CONTEXT_PLANES mb_row_left_context;
while (1) {
- if (pbi->b_multithreaded_rd == 0) break;
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) break;
if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) {
- if (pbi->b_multithreaded_rd == 0) {
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd) == 0) {
break;
} else {
MACROBLOCKD *xd = &mbrd->mbd;
@@ -591,6 +592,7 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
pbi->b_multithreaded_rd = 0;
pbi->allocated_decoding_thread_count = 0;
+ pthread_mutex_init(&pbi->mt_mutex, NULL);
/* limit decoding threads to the max number of token partitions */
core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads;
@@ -647,6 +649,16 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) {
void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) {
int i;
+ /* De-allocate mutex */
+ if (pbi->pmutex != NULL) {
+ for (i = 0; i < mb_rows; ++i) {
+ pthread_mutex_destroy(&pbi->pmutex[i]);
+ }
+
+ vpx_free(pbi->pmutex);
+ pbi->pmutex = NULL;
+ }
+
vpx_free(pbi->mt_current_mb_col);
pbi->mt_current_mb_col = NULL;
@@ -712,7 +724,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
int i;
int uv_width;
- if (pbi->b_multithreaded_rd) {
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
/* our internal buffers are always multiples of 16 */
@@ -730,6 +742,15 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
uv_width = width >> 1;
+ /* Allocate mutex */
+ CHECK_MEM_ERROR(pbi->pmutex,
+ vpx_malloc(sizeof(*pbi->pmutex) * pc->mb_rows));
+ if (pbi->pmutex) {
+ for (i = 0; i < pc->mb_rows; ++i) {
+ pthread_mutex_init(&pbi->pmutex[i], NULL);
+ }
+ }
+
/* Allocate an int for each mb row. */
CALLOC_ARRAY(pbi->mt_current_mb_col, pc->mb_rows);
@@ -772,9 +793,9 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
/* shutdown MB Decoding thread; */
- if (pbi->b_multithreaded_rd) {
+ if (protected_read(&pbi->mt_mutex, &pbi->b_multithreaded_rd)) {
int i;
- pbi->b_multithreaded_rd = 0;
+ protected_write(&pbi->mt_mutex, &pbi->b_multithreaded_rd, 0);
/* allow all threads to exit */
for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) {
@@ -804,6 +825,7 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) {
vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
}
+ pthread_mutex_destroy(&pbi->mt_mutex);
}
void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) {