summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_onyxc_int.h2
-rw-r--r--vp9/common/vp9_thread_common.c157
-rw-r--r--vp9/common/vp9_thread_common.h19
-rw-r--r--vp9/decoder/vp9_decodeframe.c100
-rw-r--r--vp9/decoder/vp9_decoder.h3
-rw-r--r--vp9/vp9_dx_iface.c11
-rw-r--r--vp9/vp9_dx_iface.h1
7 files changed, 282 insertions, 11 deletions
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 45d3b0f82..c5c63e476 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -259,6 +259,8 @@ typedef struct VP9Common {
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
int above_context_alloc_cols;
+
+ int lf_row;
} VP9_COMMON;
static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c
index d4b076645..36530fae6 100644
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -229,6 +229,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
workers, num_workers, lf_sync);
}
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
+ int num_workers) {
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+
+ if (!frame_filter_level) return;
+
+ if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+ num_workers > lf_sync->num_workers) {
+ vp9_loop_filter_dealloc(lf_sync);
+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+ lf_sync->corrupted = 0;
+
+ memset(lf_sync->num_tiles_done, 0,
+ sizeof(*lf_sync->num_tiles_done) * sb_rows);
+ cm->lf_row = 0;
+}
+
// Set up nsync by width.
static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
@@ -266,6 +288,25 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
pthread_cond_init(&lf_sync->cond[i], NULL);
}
}
+ pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
+ vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
+ if (lf_sync->recon_done_mutex) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
+ vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
+ if (lf_sync->recon_done_cond) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
+ }
+ }
}
#endif // CONFIG_MULTITHREAD
@@ -276,6 +317,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
+ vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
+ mi_cols_aligned_to_sb(cm->mi_rows) >>
+ MI_BLOCK_SIZE_LOG2));
+
// Set up nsync.
lf_sync->sync_range = get_sync_range(width);
}
@@ -298,15 +344,126 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
}
vpx_free(lf_sync->cond);
}
+ if (lf_sync->recon_done_mutex != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
+ }
+ vpx_free(lf_sync->recon_done_mutex);
+ }
+
+ pthread_mutex_destroy(&lf_sync->lf_mutex);
+ if (lf_sync->recon_done_cond != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
+ }
+ vpx_free(lf_sync->recon_done_cond);
+ }
#endif // CONFIG_MULTITHREAD
+
vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
+ vpx_free(lf_sync->num_tiles_done);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
vp9_zero(*lf_sync);
}
}
+static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
+ int return_val = -1;
+ int cur_row;
+ const int max_rows = cm->mi_rows;
+
+#if CONFIG_MULTITHREAD
+ const int tile_cols = 1 << cm->log2_tile_cols;
+
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+
+ if (return_val == -1) return return_val;
+
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
+ if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
+ pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
+ &lf_sync->recon_done_mutex[cur_row]);
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (lf_sync->corrupted) {
+ return_val = -1;
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+#else
+ (void)lf_sync;
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ return return_val;
+}
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+ int mi_row;
+ VP9_COMMON *cm = lf_data->cm;
+
+ while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) {
+ lf_data->start = mi_row;
+ lf_data->stop = mi_row + MI_BLOCK_SIZE;
+
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
+ }
+}
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ lf_sync->corrupted |= corrupted;
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
+ lf_sync->num_tiles_done[row] += 1;
+ if (num_tiles == lf_sync->num_tiles_done[row]) {
+ if (is_last_row) {
+ /* The last 2 rows wait on the last row to be done.
+ * So, we have to broadcast the signal in this case.
+ */
+ pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
+ } else {
+ pthread_cond_signal(&lf_sync->recon_done_cond[row]);
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
+#else
+ (void)lf_sync;
+ (void)num_tiles;
+ (void)row;
+ (void)is_last_row;
+ (void)corrupted;
+#endif // CONFIG_MULTITHREAD
+}
+
// Accumulate frame counts.
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h
index f92df5bd6..b97e9ee13 100644
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -37,6 +37,14 @@ typedef struct VP9LfSyncData {
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t lf_mutex;
+ pthread_mutex_t *recon_done_mutex;
+ pthread_cond_t *recon_done_cond;
+#endif
+ int *num_tiles_done;
+ int corrupted;
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
@@ -53,6 +61,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
int partial_frame, VPxWorker *workers,
int num_workers, VP9LfSync *lf_sync);
+// Multi-threaded loopfilter initialisations
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
+ int frame_filter_level, int num_workers);
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted);
+
+void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
+
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
const struct FRAME_COUNTS *counts, int is_dec);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 48c49e2f5..95e376d04 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1451,6 +1451,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
return vpx_reader_find_end(&tile_data->bit_reader);
}
+static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows,
+ int num_tiles_left, int total_num_tiles) {
+ do {
+ int mi_row;
+ const int aligned_rows = mi_cols_aligned_to_sb(mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int corrupted = 1;
+ for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) {
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2,
+ is_last_row, corrupted);
+ }
+ /* If there are multiple tiles, the second tile should start marking row
+ * progress from row 0.
+ */
+ start_row = 0;
+ } while (num_tiles_left--);
+}
+
// On entry 'tile_data->data_end' points to the end of the input frame, on exit
// it is updated to reflect the bitreader position of the final tile column if
// present in the tile buffer group or NULL otherwise.
@@ -1461,6 +1480,12 @@ static int tile_worker_hook(void *arg1, void *arg2) {
TileInfo *volatile tile = &tile_data->xd.tile;
const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
const uint8_t *volatile bit_reader_end = NULL;
+ VP9_COMMON *cm = &pbi->common;
+
+ LFWorkerData *lf_data = tile_data->lf_data;
+ VP9LfSync *lf_sync = tile_data->lf_sync;
+
+ volatile int mi_row = 0;
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
@@ -1468,14 +1493,26 @@ static int tile_worker_hook(void *arg1, void *arg2) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
tile_data->data_end = NULL;
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int num_tiles_left = tile_data->buf_end - n;
+ const int mi_row_start = mi_row;
+ set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left,
+ 1 << cm->log2_tile_cols);
+ }
return 0;
}
tile_data->xd.corrupted = 0;
do {
- int mi_row, mi_col;
+ int mi_col;
const TileBuffer *const buf = pbi->tile_buffers + n;
+
+ /* Initialize to 0 is safe since we do not deal with streams that have
+ * more than one row of tiles. (So tile->mi_row_start will be 0)
+ */
+ assert(cm->log2_tile_rows == 0);
+ mi_row = 0;
vp9_zero(tile_data->dqcoeff);
vp9_tile_init(tile, &pbi->common, 0, buf->col);
setup_token_decoder(buf->data, tile_data->data_end, buf->size,
@@ -1493,6 +1530,14 @@ static int tile_worker_hook(void *arg1, void *arg2) {
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
}
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
+ mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row,
+ tile_data->xd.corrupted);
+ }
}
if (buf->col == final_col) {
@@ -1500,6 +1545,21 @@ static int tile_worker_hook(void *arg1, void *arg2) {
}
} while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
+ if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ /* This was not incremented in the tile loop, so increment before tiles left
+ * calculation
+ */
+ ++n;
+ set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n,
+ 1 << cm->log2_tile_cols);
+ }
+
+ if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ vp9_loopfilter_rows(lf_data, lf_sync);
+ }
+
tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
@@ -1516,6 +1576,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
VP9_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
+ VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
+ YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1542,12 +1604,26 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
}
}
+ // Initialize LPF
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
+ pbi->num_tile_workers);
+ }
+
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data =
&pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
+
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ tile_data->lf_sync = lf_row_sync;
+ tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
+ vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
+ tile_data->lf_data->y_only = 0;
+ }
+
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -2069,17 +2145,19 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- if (!xd->corrupted) {
- if (!cm->skip_loop_filter) {
- // If multiple threads are used to decode tiles, then we use those
- // threads to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!pbi->lpf_mt_opt) {
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
}
- } else {
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Decode failed. Frame data is corrupted.");
}
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 1c488961a..425c8964c 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -37,6 +37,8 @@ typedef struct TileWorkerData {
int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive
vpx_reader bit_reader;
FRAME_COUNTS counts;
+ LFWorkerData *lf_data;
+ VP9LfSync *lf_sync;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
@@ -74,6 +76,7 @@ typedef struct VP9Decoder {
int hold_ref_buf; // hold the reference buffer.
int row_mt;
+ int lpf_mt_opt;
} VP9Decoder;
int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index fdff87768..6a4cb9acf 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -270,6 +270,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
RANGE_CHECK(ctx, row_mt, 0, 1);
ctx->pbi->row_mt = ctx->row_mt;
+ RANGE_CHECK(ctx, lpf_opt, 0, 1);
+ ctx->pbi->lpf_mt_opt = ctx->lpf_opt;
+
// If postprocessing was enabled by the application and a
// configuration has not been provided, default it.
if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
@@ -658,6 +661,13 @@ static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->lpf_opt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -670,6 +680,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
{ VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
{ VP9D_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt },
// Getters
{ VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/vp9/vp9_dx_iface.h b/vp9/vp9_dx_iface.h
index a1c335278..f60688c4d 100644
--- a/vp9/vp9_dx_iface.h
+++ b/vp9/vp9_dx_iface.h
@@ -46,6 +46,7 @@ struct vpx_codec_alg_priv {
int svc_decoding;
int svc_spatial_layer;
int row_mt;
+ int lpf_opt;
};
#endif // VPX_VP9_VP9_DX_IFACE_H_