summaryrefslogtreecommitdiff
path: root/vp8/encoder
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2016-01-06 18:27:37 -0800
committerYunqing Wang <yunqingwang@google.com>2016-01-08 11:59:49 -0800
commitff0107f60dbddec82d7d7feacd1c9a3ff9ab4a51 (patch)
tree0590f67ac2b1e9266a6b0cf4fbc144d3f9f69b00 /vp8/encoder
parentb8c2a4eb0c47b633096f5c428b70607e7bf8d570 (diff)
downloadlibvpx-ff0107f60dbddec82d7d7feacd1c9a3ff9ab4a51.tar
libvpx-ff0107f60dbddec82d7d7feacd1c9a3ff9ab4a51.tar.gz
libvpx-ff0107f60dbddec82d7d7feacd1c9a3ff9ab4a51.tar.bz2
libvpx-ff0107f60dbddec82d7d7feacd1c9a3ff9ab4a51.zip
Amend and improve VP8 multithreading implementation
There are flaws in current implementation of VP8 multithreading encoder and decoder as reported in the following issue: https://code.google.com/p/chromium/issues/detail?id=158922 Although the data race warnings are harmless, and wouldn't cause real problems while encoding and decoding videos, it is better to fix the warnings so that VP8 code could pass the TSan test. To synchronize the thread-shared data access and maintain the speed (i.e. decoding speed), use multiple mutexes based on mb_rows to reduce the number of synchronizations needed, make the reads and writes of the shared data protected, and reduce the number of mb_col writes by nsync times. The decoder speed tests showed < 3% speed loss while using 2 ~ 4 threads. Change-Id: Ie296defffcd86a693188b668270d811964227882
Diffstat (limited to 'vp8/encoder')
-rw-r--r--vp8/encoder/encodeframe.c24
-rw-r--r--vp8/encoder/ethreading.c54
-rw-r--r--vp8/encoder/onyx_if.c34
-rw-r--r--vp8/encoder/onyx_int.h2
4 files changed, 79 insertions, 35 deletions
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b0aaa2f0b..9b05cd1fc 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -386,8 +386,8 @@ void encode_mb_row(VP8_COMP *cpi,
#if CONFIG_MULTITHREAD
const int nsync = cpi->mt_sync_range;
const int rightmost_col = cm->mb_cols + nsync;
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+ const int *last_row_current_mb_col;
+ int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
@@ -461,17 +461,15 @@ void encode_mb_row(VP8_COMP *cpi,
vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16);
#if CONFIG_MULTITHREAD
- if (cpi->b_multi_threaded != 0)
- {
- *current_mb_col = mb_col - 1; /* set previous MB done */
+ if (cpi->b_multi_threaded != 0) {
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0)
- {
- while (mb_col > (*last_row_current_mb_col - nsync))
- {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
}
#endif
@@ -616,7 +614,7 @@ void encode_mb_row(VP8_COMP *cpi,
#if CONFIG_MULTITHREAD
if (cpi->b_multi_threaded != 0)
- *current_mb_col = rightmost_col;
+ protected_write(&cpi->pmutex[mb_row], current_mb_col, rightmost_col);
#endif
/* this is to account for the border */
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 4e234ccd5..4f689c4bc 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -26,12 +26,13 @@ static THREAD_FUNCTION thread_loopfilter(void *p_data)
while (1)
{
- if (cpi->b_multi_threaded == 0)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
if (sem_wait(&cpi->h_event_start_lpf) == 0)
{
- if (cpi->b_multi_threaded == 0) /* we're shutting down */
+ /* we're shutting down */
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
vp8_loopfilter_frame(cpi, cm);
@@ -53,7 +54,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
while (1)
{
- if (cpi->b_multi_threaded == 0)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0)
@@ -72,9 +73,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int *segment_counts = mbri->segment_counts;
int *totalrate = &mbri->totalrate;
- if (cpi->b_multi_threaded == 0) /* we're shutting down */
+ /* we're shutting down */
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded) == 0)
break;
+ xd->mode_info_context = cm->mi + cm->mode_info_stride *
+ (ithread + 1);
+ xd->mode_info_stride = cm->mode_info_stride;
+
for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
{
@@ -85,8 +91,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
int map_index = (mb_row * cm->mb_cols);
- volatile const int *last_row_current_mb_col;
- volatile int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
+ const int *last_row_current_mb_col;
+ int *current_mb_col = &cpi->mt_current_mb_col[mb_row];
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)];
@@ -113,15 +119,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
/* for each macroblock col in image */
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
- *current_mb_col = mb_col - 1;
+ if (((mb_col - 1) % nsync) == 0) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row];
+ protected_write(mutex, current_mb_col, mb_col - 1);
+ }
- if ((mb_col & (nsync - 1)) == 0)
- {
- while (mb_col > (*last_row_current_mb_col - nsync))
- {
- x86_pause_hint();
- thread_sleep(0);
- }
+ if (mb_row && !(mb_col & (nsync - 1))) {
+ pthread_mutex_t *mutex = &cpi->pmutex[mb_row-1];
+ sync_read(mutex, mb_col, last_row_current_mb_col, nsync);
}
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
@@ -296,7 +301,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
xd->dst.u_buffer + 8,
xd->dst.v_buffer + 8);
- *current_mb_col = mb_col + nsync;
+ protected_write(&cpi->pmutex[mb_row], current_mb_col,
+ mb_col + nsync);
/* this is to account for the border */
xd->mode_info_context++;
@@ -473,9 +479,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
- mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1);
- mbd->mode_info_stride = cm->mode_info_stride;
-
mbd->frame_type = cm->frame_type;
mb->src = * cpi->Source;
@@ -517,6 +520,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
cpi->encoding_thread_count = 0;
cpi->b_lpf_running = 0;
+ pthread_mutex_init(&cpi->mt_mutex, NULL);
+
if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
{
int ithread;
@@ -580,7 +585,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
if(rc)
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
for(--ithread; ithread >= 0; ithread--)
{
pthread_join(cpi->h_encoding_thread[ithread], 0);
@@ -594,6 +599,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
+ pthread_mutex_destroy(&cpi->mt_mutex);
+
return -1;
}
@@ -611,7 +618,7 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
if(rc)
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
for(--ithread; ithread >= 0; ithread--)
{
sem_post(&cpi->h_event_start_encoding[ithread]);
@@ -628,6 +635,8 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
+ pthread_mutex_destroy(&cpi->mt_mutex);
+
return -2;
}
}
@@ -637,10 +646,10 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
{
- if (cpi->b_multi_threaded)
+ if (protected_read(&cpi->mt_mutex, &cpi->b_multi_threaded))
{
/* shutdown other threads */
- cpi->b_multi_threaded = 0;
+ protected_write(&cpi->mt_mutex, &cpi->b_multi_threaded, 0);
{
int i;
@@ -666,5 +675,6 @@ void vp8cx_remove_encoder_threads(VP8_COMP *cpi)
vpx_free(cpi->mb_row_ei);
vpx_free(cpi->en_thread_data);
}
+ pthread_mutex_destroy(&cpi->mt_mutex);
}
#endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index df5bcf688..5a4b37dcf 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -477,6 +477,18 @@ static void dealloc_compressor_data(VP8_COMP *cpi)
cpi->mb.pip = 0;
#if CONFIG_MULTITHREAD
+ /* De-allocate mutex */
+ if (cpi->pmutex != NULL) {
+ VP8_COMMON *const pc = &cpi->common;
+ int i;
+
+ for (i = 0; i < pc->mb_rows; i++) {
+ pthread_mutex_destroy(&cpi->pmutex[i]);
+ }
+ vpx_free(cpi->pmutex);
+ cpi->pmutex = NULL;
+ }
+
vpx_free(cpi->mt_current_mb_col);
cpi->mt_current_mb_col = NULL;
#endif
@@ -1180,6 +1192,9 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
int width = cm->Width;
int height = cm->Height;
+#if CONFIG_MULTITHREAD
+ int prev_mb_rows = cm->mb_rows;
+#endif
if (vp8_alloc_frame_buffers(cm, width, height))
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
@@ -1271,6 +1286,25 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
if (cpi->oxcf.multi_threaded > 1)
{
+ int i;
+
+ /* De-allocate and re-allocate mutex */
+ if (cpi->pmutex != NULL) {
+ for (i = 0; i < prev_mb_rows; i++) {
+ pthread_mutex_destroy(&cpi->pmutex[i]);
+ }
+ vpx_free(cpi->pmutex);
+ cpi->pmutex = NULL;
+ }
+
+ CHECK_MEM_ERROR(cpi->pmutex, vpx_malloc(sizeof(*cpi->pmutex) *
+ cm->mb_rows));
+ if (cpi->pmutex) {
+ for (i = 0; i < cm->mb_rows; i++) {
+ pthread_mutex_init(&cpi->pmutex[i], NULL);
+ }
+ }
+
vpx_free(cpi->mt_current_mb_col);
CHECK_MEM_ERROR(cpi->mt_current_mb_col,
vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 317e4b9e4..2b2f7a0a9 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -530,6 +530,8 @@ typedef struct VP8_COMP
#if CONFIG_MULTITHREAD
/* multithread data */
+ pthread_mutex_t *pmutex;
+ pthread_mutex_t mt_mutex; /* mutex for b_multi_threaded */
int * mt_current_mb_col;
int mt_sync_range;
int b_multi_threaded;