summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYunqing Wang <yunqingwang@google.com>2017-02-15 00:54:10 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2017-02-15 00:54:10 +0000
commitf2c1aea1180b43a1ab7642d504a84fabf1b147b8 (patch)
treeb60bb38f9f046a0b3a9636183246f4ea498478d1
parent615566aa81327767f89543927048dbbab1156e6d (diff)
parent71061e9332c05324007e7f6c900285273793366d (diff)
downloadlibvpx-f2c1aea1180b43a1ab7642d504a84fabf1b147b8.tar
libvpx-f2c1aea1180b43a1ab7642d504a84fabf1b147b8.tar.gz
libvpx-f2c1aea1180b43a1ab7642d504a84fabf1b147b8.tar.bz2
libvpx-f2c1aea1180b43a1ab7642d504a84fabf1b147b8.zip
Merge "Row based multi-threading of encoding stage"
-rw-r--r--vp9/encoder/vp9_bitstream.c5
-rw-r--r--vp9/encoder/vp9_block.h5
-rw-r--r--vp9/encoder/vp9_encodeframe.c48
-rw-r--r--vp9/encoder/vp9_encodeframe.h3
-rw-r--r--vp9/encoder/vp9_encoder.c26
-rw-r--r--vp9/encoder/vp9_encoder.h6
-rw-r--r--vp9/encoder/vp9_ethread.c99
-rw-r--r--vp9/encoder/vp9_ethread.h2
-rw-r--r--vp9/encoder/vp9_mcomp.c45
-rw-r--r--vp9/encoder/vp9_multi_thread.c40
-rw-r--r--vp9/encoder/vp9_pickmode.c10
-rw-r--r--vp9/encoder/vp9_rd.c14
-rw-r--r--vp9/encoder/vp9_rd.h25
-rw-r--r--vp9/encoder/vp9_rdopt.c48
-rw-r--r--vp9/encoder/vp9_speed_features.c18
-rw-r--r--vp9/vp9_cx_iface.c3
16 files changed, 357 insertions, 40 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 1f2d88c34..8433f4edd 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -921,10 +921,11 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) {
static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {
MACROBLOCKD *const xd = &data->xd;
+ const int tile_row = 0;
vpx_start_encode(&data->bit_writer, data->dest);
write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
- &data->bit_writer, 0, data->tile_idx, &data->max_mv_magnitude,
- data->interp_filter_selected);
+ &data->bit_writer, tile_row, data->tile_idx,
+ &data->max_mv_magnitude, data->interp_filter_selected);
vpx_stop_encode(&data->bit_writer);
return 1;
}
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 91d07e3a0..c0c69f6b5 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -11,6 +11,8 @@
#ifndef VP9_ENCODER_VP9_BLOCK_H_
#define VP9_ENCODER_VP9_BLOCK_H_
+#include "vpx_util/vpx_thread.h"
+
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
@@ -88,6 +90,9 @@ struct macroblock {
int mb_energy;
int *m_search_count_ptr;
int *ex_search_count_ptr;
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *search_count_mutex;
+#endif
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 1bbdeece5..215f8b8f6 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3095,13 +3095,18 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
const int mi_col_start = tile_info->mi_col_start;
const int mi_col_end = tile_info->mi_col_end;
int mi_col;
+ const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2;
+ const int num_sb_cols =
+ get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2);
+ int sb_col_in_tile;
// Initialize the left context for the new SB row
memset(&xd->left_context, 0, sizeof(xd->left_context));
memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) {
+ for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end;
+ mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) {
const struct segmentation *const seg = &cm->seg;
int dummy_rate;
int64_t dummy_dist;
@@ -3112,6 +3117,9 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
+ (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
+ sb_col_in_tile - 1);
+
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
@@ -3163,6 +3171,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rdc, INT64_MAX, td->pc_root);
}
+ (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
+ sb_col_in_tile, num_sb_cols);
}
}
@@ -4109,13 +4119,17 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
tile_data->mode_map[i][j] = j;
}
}
+#if CONFIG_MULTITHREAD
+ tile_data->search_count_mutex = NULL;
+ tile_data->enc_row_mt_mutex = NULL;
+#endif
}
}
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileInfo *tile_info =
- &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ TileInfo *tile_info = &this_tile->tile_info;
vp9_tile_init(tile_info, cm, tile_row, tile_col);
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
@@ -4125,6 +4139,10 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = cpi->tplist[tile_row][tile_col];
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
+
+ // Set up pointers to per thread motion search counters.
+ this_tile->m_search_count = 0; // Count of motion search hits.
+ this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
}
}
}
@@ -4170,10 +4188,11 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
int mi_row;
// Set up pointers to per thread motion search counters.
- this_tile->m_search_count = 0; // Count of motion search hits.
- this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
td->mb.m_search_count_ptr = &this_tile->m_search_count;
td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
+#if CONFIG_MULTITHREAD
+ td->mb.search_count_mutex = this_tile->search_count_mutex;
+#endif
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
@@ -4289,11 +4308,20 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
#endif
- // If allowed, encoding tiles in parallel with one thread handling one tile.
- if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
- vp9_encode_tiles_mt(cpi);
- else
- encode_tiles(cpi);
+ if (!cpi->new_mt) {
+ cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
+ cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
+ // If allowed, encoding tiles in parallel with one thread handling one
+ // tile when row based multi-threading is disabled.
+ if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
+ vp9_encode_tiles_mt(cpi);
+ else
+ encode_tiles(cpi);
+ } else {
+ cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read;
+ cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write;
+ vp9_encode_tiles_row_mt(cpi);
+ }
vpx_usec_timer_mark(&emr_timer);
cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h
index aa5494785..2b9b65dcd 100644
--- a/vp9/encoder/vp9_encodeframe.h
+++ b/vp9/encoder/vp9_encodeframe.h
@@ -39,6 +39,9 @@ void vp9_init_tile_data(struct VP9_COMP *cpi);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row,
int tile_col);
+void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td,
+ int tile_row, int tile_col, int mi_row);
+
void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 50fa8c682..2ce46c657 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -1575,17 +1575,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
highbd_set_var_fns(cpi);
#endif
- // Enable multi-threading for first pass.
- cpi->new_mt = 0;
- if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
- cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
- cpi->oxcf.new_mt && !cpi->use_svc)
- cpi->new_mt = 1;
-
- if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
- (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&
- !cpi->use_svc)
- cpi->new_mt = 1;
+ vp9_set_new_mt(cpi);
}
#ifndef M_LOG2_E
@@ -5213,3 +5203,17 @@ void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) {
vp9_update_entropy(cpi, 0);
}
}
+
+void vp9_set_new_mt(VP9_COMP *cpi) {
+ // Enable row based multi-threading for supported modes of encoding
+ cpi->new_mt = 0;
+ if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) &&
+ cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) &&
+ cpi->oxcf.new_mt && !cpi->use_svc)
+ cpi->new_mt = 1;
+
+ if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 &&
+ (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt &&
+ !cpi->use_svc)
+ cpi->new_mt = 1;
+}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 65f3f86de..675512618 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -276,6 +276,10 @@ typedef struct TileDataEnc {
int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *search_count_mutex;
+ pthread_mutex_t *enc_row_mt_mutex;
+#endif
} TileDataEnc;
typedef struct RowMTInfo {
@@ -897,6 +901,8 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
+void vp9_set_new_mt(VP9_COMP *cpi);
+
#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c
index 1bffc4030..bf8108416 100644
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -341,7 +341,7 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
#if CONFIG_MULTITHREAD
const int nsync = row_mt_sync->sync_range;
int cur;
- // Only signal when there are enough filtered SB for next row to run.
+ // Only signal when there are enough encoded blocks for next row to run.
int sig = 1;
if (c < cols - 1) {
@@ -542,3 +542,100 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
multi_thread_ctxt, num_workers);
}
+
+static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
+ MultiThreadHandle *multi_thread_ctxt) {
+ VP9_COMP *const cpi = thread_data->cpi;
+ const VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ int tile_row, tile_col;
+ TileDataEnc *this_tile;
+ int end_of_frame;
+ int thread_id = thread_data->thread_id;
+ int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
+ JobNode *proc_job = NULL;
+ int mi_row;
+
+ end_of_frame = 0;
+ while (0 == end_of_frame) {
+ // Get the next job in the queue
+ proc_job =
+ (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id);
+ if (NULL == proc_job) {
+ // Query for the status of other tiles
+ end_of_frame = vp9_get_tiles_proc_status(
+ multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id,
+ tile_cols);
+ } else {
+ tile_col = proc_job->tile_col_id;
+ tile_row = proc_job->tile_row_id;
+ mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
+
+ this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
+ thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
+#if CONFIG_MULTITHREAD
+ thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
+#endif
+
+ vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
+ }
+ }
+ return 0;
+}
+
+void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
+ int num_workers = VPXMAX(cpi->oxcf.max_threads, 1);
+ int i;
+
+ if (multi_thread_ctxt->allocated_tile_cols < tile_cols ||
+ multi_thread_ctxt->allocated_tile_rows < tile_rows ||
+ multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) {
+ vp9_row_mt_mem_dealloc(cpi);
+ vp9_init_tile_data(cpi);
+ vp9_row_mt_mem_alloc(cpi);
+ } else {
+ vp9_init_tile_data(cpi);
+ }
+
+ create_enc_workers(cpi, num_workers);
+
+ vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers);
+
+ vp9_prepare_job_queue(cpi, ENCODE_JOB);
+
+ vp9_multi_thread_tile_init(cpi);
+
+ for (i = 0; i < num_workers; i++) {
+ EncWorkerData *thread_data;
+ thread_data = &cpi->tile_thr_data[i];
+
+ // Before encoding a frame, copy the thread data from cpi.
+ if (thread_data->td != &cpi->td) {
+ thread_data->td->mb = cpi->td.mb;
+ thread_data->td->rd_counts = cpi->td.rd_counts;
+ }
+ if (thread_data->td->counts != &cpi->common.counts) {
+ memcpy(thread_data->td->counts, &cpi->common.counts,
+ sizeof(cpi->common.counts));
+ }
+ }
+
+ launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
+ multi_thread_ctxt, num_workers);
+
+ for (i = 0; i < num_workers; i++) {
+ VPxWorker *const worker = &cpi->workers[i];
+ EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
+
+ // Accumulate counters.
+ if (i < cpi->num_workers - 1) {
+ vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0);
+ accumulate_rd_opt(&cpi->td, thread_data->td);
+ }
+ }
+}
diff --git a/vp9/encoder/vp9_ethread.h b/vp9/encoder/vp9_ethread.h
index 908bb6ff6..a396e621d 100644
--- a/vp9/encoder/vp9_ethread.h
+++ b/vp9/encoder/vp9_ethread.h
@@ -44,6 +44,8 @@ typedef struct VP9RowMTSyncData {
void vp9_encode_tiles_mt(struct VP9_COMP *cpi);
+void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi);
+
void vp9_encode_fp_row_mt(struct VP9_COMP *cpi);
void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c);
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index fa8b2ff0e..16426b28e 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1992,9 +1992,18 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
+#endif
+
// Keep track of number of exhaustive calls (this frame in this thread).
++(*x->ex_search_count_ptr);
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex)
+ pthread_mutex_unlock(x->search_count_mutex);
+#endif
+
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range))
@@ -2355,13 +2364,27 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
#define MIN_EX_SEARCH_LIMIT 128
static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
const SPEED_FEATURES *const sf = &cpi->sf;
- const int max_ex =
- VPXMAX(MIN_EX_SEARCH_LIMIT,
- (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
+ int is_exhaustive_allowed;
+ int max_ex;
+
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
+#endif
+
+ max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
+ (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
- return sf->allow_exhaustive_searches &&
- (sf->exhaustive_searches_thresh < INT_MAX) &&
- (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref;
+ is_exhaustive_allowed = sf->allow_exhaustive_searches &&
+ (sf->exhaustive_searches_thresh < INT_MAX) &&
+ (*x->ex_search_count_ptr <= max_ex) &&
+ !cpi->rc.is_src_frame_alt_ref;
+
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex)
+ pthread_mutex_unlock(x->search_count_mutex);
+#endif
+
+ return is_exhaustive_allowed;
}
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
@@ -2406,9 +2429,19 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex)
+ pthread_mutex_lock(x->search_count_mutex);
+#endif
+
// Keep track of number of searches (this frame in this thread).
++(*x->m_search_count_ptr);
+#if CONFIG_MULTITHREAD
+ if (NULL != x->search_count_mutex)
+ pthread_mutex_unlock(x->search_count_mutex);
+#endif
+
// Should we allow a follow on exhaustive search?
if (is_exhaustive_allowed(cpi, x)) {
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
diff --git a/vp9/encoder/vp9_multi_thread.c b/vp9/encoder/vp9_multi_thread.c
index 23b0b4276..e27b1ed3a 100644
--- a/vp9/encoder/vp9_multi_thread.c
+++ b/vp9/encoder/vp9_multi_thread.c
@@ -100,11 +100,32 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
multi_thread_ctxt->num_tile_vert_sbs[tile_row] =
get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
}
+
+#if CONFIG_MULTITHREAD
+ for (tile_row = 0; tile_row < tile_rows; tile_row++) {
+ for (tile_col = 0; tile_col < tile_cols; tile_col++) {
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+
+ CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
+ vpx_malloc(sizeof(*this_tile->search_count_mutex)));
+
+ pthread_mutex_init(this_tile->search_count_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
+ vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
+
+ pthread_mutex_init(this_tile->enc_row_mt_mutex, NULL);
+ }
+ }
+#endif
}
void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
int tile_col;
+#if CONFIG_MULTITHREAD
+ int tile_row;
+#endif
// Deallocate memory for job queue
if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue);
@@ -124,6 +145,25 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync);
}
+
+#if CONFIG_MULTITHREAD
+ for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows;
+ tile_row++) {
+ for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols;
+ tile_col++) {
+ TileDataEnc *this_tile =
+ &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
+ tile_col];
+ pthread_mutex_destroy(this_tile->search_count_mutex);
+ vpx_free(this_tile->search_count_mutex);
+ this_tile->search_count_mutex = NULL;
+
+ pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
+ vpx_free(this_tile->enc_row_mt_mutex);
+ this_tile->enc_row_mt_mutex = NULL;
+ }
+ }
+#endif
}
void vp9_multi_thread_tile_init(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index cff8a3fa9..9f2e93adc 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1657,7 +1657,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
mode_rd_thresh = mode_rd_thresh << 3;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
- rd_thresh_freq_fact[mode_index]))
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ &rd_thresh_freq_fact[mode_index]))
continue;
if (this_mode == NEWMV) {
@@ -2018,7 +2021,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
- rd_thresh_freq_fact[mode_index]))
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ &rd_thresh_freq_fact[mode_index]))
continue;
mi->mode = this_mode;
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 3bbfa1aac..21e3b1f63 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -610,7 +610,15 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
}
void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
- int bsize, int best_mode_index) {
+ int bsize,
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *enc_row_mt_mutex,
+#endif
+ int best_mode_index) {
+#if CONFIG_MULTITHREAD
+ if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);
+#endif
+
if (rd_thresh > 0) {
const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
int mode;
@@ -628,6 +636,10 @@ void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
}
}
}
+
+#if CONFIG_MULTITHREAD
+ if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);
+#endif
}
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 1c6831358..74a2f5d95 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -164,11 +164,32 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *enc_row_mt_mutex,
+#endif
int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
- int thresh_fact) {
- return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *enc_row_mt_mutex,
+#endif
+ const int *const thresh_fact) {
+ int is_rd_less_than_thresh;
+
+#if CONFIG_MULTITHREAD
+ // Synchronize to ensure data coherency as thresh_freq_fact is maintained at
+ // tile level and not thread-safe with row based multi-threading
+ if (NULL != enc_row_mt_mutex) pthread_mutex_lock(enc_row_mt_mutex);
+#endif
+
+ is_rd_less_than_thresh =
+ best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX;
+
+#if CONFIG_MULTITHREAD
+ if (NULL != enc_row_mt_mutex) pthread_mutex_unlock(enc_row_mt_mutex);
+#endif
+
+ return is_rd_less_than_thresh;
}
static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 4e1ca328c..8d1006b6e 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3043,7 +3043,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
int64_t mode_threshold[MAX_MODES];
- int *mode_map = tile_data->mode_map[bsize];
+ int *tile_mode_map = tile_data->mode_map[bsize];
+ int mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
+ // lock mechanism involved with reads from
+ // tile_mode_map
const int mode_search_skip_flags = sf->mode_search_skip_flags;
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
@@ -3155,10 +3158,19 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
+
+#if CONFIG_MULTITHREAD
+ if (NULL != tile_data->enc_row_mt_mutex)
+ pthread_mutex_lock(tile_data->enc_row_mt_mutex);
+#endif
+
for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
midx = sf->schedule_mode_search ? mode_skip_start : 0;
+
+ memcpy(mode_map, tile_mode_map, sizeof(mode_map));
+
while (midx > 4) {
uint8_t end_pos = 0;
for (i = 5; i < midx; ++i) {
@@ -3172,6 +3184,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
midx = end_pos;
}
+ memcpy(tile_mode_map, mode_map, sizeof(mode_map));
+
+#if CONFIG_MULTITHREAD
+ if (NULL != tile_data->enc_row_mt_mutex)
+ pthread_mutex_unlock(tile_data->enc_row_mt_mutex);
+#endif
+
for (midx = 0; midx < MAX_MODES; ++midx) {
int mode_index = mode_map[midx];
int mode_excluded = 0;
@@ -3573,6 +3592,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
+ // If adaptive interp filter is enabled, then the current leaf node of 8x8
+ // data is needed for sub8x8. Hence preserve the context.
+ if (cpi->new_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
@@ -3599,7 +3621,11 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!cpi->rc.is_src_frame_alt_ref)
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
- sf->adaptive_rd_thresh, bsize, best_mode_index);
+ sf->adaptive_rd_thresh, bsize,
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ best_mode_index);
// macroblock modes
*mi = best_mbmode;
@@ -3737,7 +3763,11 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
(cm->interp_filter == mi->interp_filter));
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
- cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
+ cpi->sf.adaptive_rd_thresh, bsize,
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ THR_ZEROMV);
vp9_zero(best_pred_diff);
vp9_zero(best_filter_diff);
@@ -3789,6 +3819,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
int internal_active_edge =
vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
+ const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
memset(x->zcoeff_blk[TX_4X4], 0, 4);
@@ -3880,7 +3911,10 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
if (!internal_active_edge &&
rd_less_than_thresh(best_rd,
rd_opt->threshes[segment_id][bsize][ref_index],
- tile_data->thresh_freq_fact[bsize][ref_index]))
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ &rd_thresh_freq_fact[ref_index]))
continue;
comp_pred = second_ref_frame > INTRA_FRAME;
@@ -4324,7 +4358,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
!is_inter_block(&best_mbmode));
vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
- bsize, best_ref_index);
+ bsize,
+#if CONFIG_MULTITHREAD
+ tile_data->enc_row_mt_mutex,
+#endif
+ best_ref_index);
// macroblock modes
*mi = best_mbmode;
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index d17cad1f3..d63f8db7d 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -585,6 +585,15 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
rd->thresh_mult_sub8x8[i] = INT_MAX;
}
}
+
+ // With row based multi-threading, the following speed features
+ // have to be disabled to guarantee that bitstreams encoded with single thread
+ // and multiple threads match
+ if (cpi->oxcf.ethread_bit_match) {
+ sf->adaptive_rd_thresh = 0;
+ sf->allow_exhaustive_searches = 0;
+ sf->adaptive_pred_interp_filter = 0;
+ }
}
void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
@@ -747,4 +756,13 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
if (!cpi->oxcf.frame_periodic_boost) {
sf->max_delta_qindex = 0;
}
+
+ // With row based multi-threading, the following speed features
+ // have to be disabled to guarantee that bitstreams encoded with single thread
+ // and multiple threads match
+ if (cpi->oxcf.ethread_bit_match) {
+ sf->adaptive_rd_thresh = 0;
+ sf->allow_exhaustive_searches = 0;
+ sf->adaptive_pred_interp_filter = 0;
+ }
}
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 481189020..cc946dfd6 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1459,6 +1459,9 @@ static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
cfg->ss_number_layers > 1 && cfg->ts_number_layers > 1) {
return VPX_CODEC_INVALID_PARAM;
}
+
+ vp9_set_new_mt(ctx->cpi);
+
return VPX_CODEC_OK;
}