summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_mvref_common.c18
-rw-r--r--vp9/decoder/vp9_decodeframe.c4
-rw-r--r--vp9/decoder/vp9_decodemv.c2
-rw-r--r--vp9/decoder/vp9_decoder.h1
-rw-r--r--vp9/decoder/vp9_dthread.c32
-rw-r--r--vp9/encoder/vp9_encodeframe.c11
-rw-r--r--vp9/encoder/vp9_ethread.c52
-rw-r--r--vp9/encoder/vp9_pickmode.c38
-rw-r--r--vp9/vp9_cx_iface.c6
9 files changed, 91 insertions, 73 deletions
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 51aa82411..52a731329 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -69,13 +69,23 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
- // Synchronize here for frame parallel decode if sync function is provided.
- if (sync != NULL) {
- sync(data, mi_row);
- }
+ // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast
+ // on windows platform. The sync here is unncessary if use_perv_frame_mvs
+ // is 0. But after removing it, there will be hang in the unit test on windows
+ // due to several threads waiting for a thread's signal.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+#endif
// Check the last frame's mode and mv info.
if (cm->use_prev_frame_mvs) {
+ // Synchronize here for frame parallel decode if sync function is provided.
+ if (cm->frame_parallel_decode && sync != NULL) {
+ sync(data, mi_row);
+ }
+
if (prev_frame_mvs->ref_frame[0] == ref_frame) {
ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done);
} else if (prev_frame_mvs->ref_frame[1] == ref_frame) {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index c183cf38e..8840750fd 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1926,7 +1926,7 @@ void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
// pixels of each superblock row can be changed by next superblock row.
if (pbi->frame_parallel_decode)
vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- (y1 + 7) << (plane == 0 ? 0 : 1));
+ MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1)));
// Skip border extension if block is inside the frame.
if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
@@ -1982,7 +1982,7 @@ void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
// pixels of each superblock row can be changed by next superblock row.
if (pbi->frame_parallel_decode)
vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- (y1 + 7) << (plane == 0 ? 0 : 1));
+ MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1)));
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index ec32f1d30..37bdfe058 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -457,7 +457,7 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
static void fpm_sync(void *const data, int mi_row) {
VP9Decoder *const pbi = (VP9Decoder *)data;
- vp9_frameworker_wait(pbi->frame_worker_owner, pbi->prev_buf,
+ vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame,
mi_row << MI_BLOCK_SIZE_LOG2);
}
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 10598ca3c..c19f0ac3b 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -55,7 +55,6 @@ typedef struct VP9Decoder {
// TODO(hkuang): Combine this with cur_buf in macroblockd as they are
// the same.
RefCntBuffer *cur_buf; // Current decoding frame buffer.
- RefCntBuffer *prev_buf; // Previous decoding frame buffer.
VP9Worker *frame_worker_owner; // frame_worker that owns this pbi.
VP9Worker lf_worker;
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index d2a2b819c..be2e6cd15 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -38,9 +38,15 @@ void vp9_frameworker_unlock_stats(VP9Worker *const worker) {
void vp9_frameworker_signal_stats(VP9Worker *const worker) {
#if CONFIG_MULTITHREAD
FrameWorkerData *const worker_data = worker->data1;
- // TODO(hkuang): Investigate using broadcast or signal.
+
+// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper.
+#if defined(_WIN32) && !HAVE_PTHREAD_H
pthread_cond_signal(&worker_data->stats_cond);
#else
+ pthread_cond_broadcast(&worker_data->stats_cond);
+#endif
+
+#else
(void)worker;
#endif
}
@@ -144,44 +150,22 @@ void vp9_frameworker_copy_context(VP9Worker *const dst_worker,
&src_worker_data->stats_mutex);
}
- // src worker may have already finished decoding a frame and swapped the mi.
- // TODO(hkuang): Remove following code after implenment no ModeInfo decoding.
- if (src_worker_data->frame_decoded) {
- dst_cm->prev_mip = src_cm->prev_mip;
- dst_cm->prev_mi = src_cm->prev_mi;
- } else {
- dst_cm->prev_mip = src_cm->mip;
- dst_cm->prev_mi = src_cm->mi;
- }
-
dst_cm->last_frame_seg_map = src_cm->seg.enabled ?
src_cm->current_frame_seg_map : src_cm->last_frame_seg_map;
dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync;
vp9_frameworker_unlock_stats(src_worker);
- dst_worker_data->pbi->prev_buf =
- src_worker_data->pbi->common.show_existing_frame ?
- NULL : src_worker_data->pbi->cur_buf;
-
dst_cm->prev_frame = src_cm->show_existing_frame ?
src_cm->prev_frame : src_cm->cur_frame;
dst_cm->last_width = !src_cm->show_existing_frame ?
src_cm->width : src_cm->last_width;
dst_cm->last_height = !src_cm->show_existing_frame ?
src_cm->height : src_cm->last_height;
- dst_cm->display_width = src_cm->display_width;
- dst_cm->display_height = src_cm->display_height;
dst_cm->subsampling_x = src_cm->subsampling_x;
dst_cm->subsampling_y = src_cm->subsampling_y;
+ dst_cm->frame_type = src_cm->frame_type;
dst_cm->last_show_frame = !src_cm->show_existing_frame ?
src_cm->show_frame : src_cm->last_show_frame;
- dst_cm->last_frame_type = src_cm->last_frame_type;
- dst_cm->frame_type = src_cm->frame_type;
- dst_cm->y_dc_delta_q = src_cm->y_dc_delta_q;
- dst_cm->uv_dc_delta_q = src_cm->uv_dc_delta_q;
- dst_cm->uv_ac_delta_q = src_cm->uv_ac_delta_q;
- dst_cm->base_qindex = src_cm->base_qindex;
-
for (i = 0; i < REF_FRAMES; ++i)
dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i];
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index a86981a71..4949997db 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -595,9 +595,16 @@ static void choose_partitioning(VP9_COMP *cpi,
for (i = 1; i <= 2; ++i) {
struct macroblock_plane *p = &x->plane[i];
struct macroblockd_plane *pd = &xd->plane[i];
+#if GLOBAL_MOTION
+ const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
+#else
const BLOCK_SIZE bs = get_plane_block_size(BLOCK_64X64, pd);
- uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride);
+#endif
+ if (bs == BLOCK_INVALID)
+ uv_sad = INT_MAX;
+ else
+ uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
#if GLOBAL_MOTION
x->color_sensitivity[i - 1] = uv_sad * 4 > y_sad;
diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c
index 9916cc06c..46814cdfd 100644
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -79,26 +79,26 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
winterface->init(worker);
if (i < num_workers - 1) {
- thread_data->cpi = cpi;
-
- // Allocate thread data.
- CHECK_MEM_ERROR(cm, thread_data->td,
- vpx_memalign(32, sizeof(*thread_data->td)));
- vp9_zero(*thread_data->td);
-
- // Set up pc_tree.
- thread_data->td->leaf_tree = NULL;
- thread_data->td->pc_tree = NULL;
- vp9_setup_pc_tree(cm, thread_data->td);
-
- // Allocate frame counters in thread data.
- CHECK_MEM_ERROR(cm, thread_data->td->counts,
- vpx_calloc(1, sizeof(*thread_data->td->counts)));
-
- // Create threads
- if (!winterface->reset(worker))
- vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
- "Tile encoder thread creation failed");
+ thread_data->cpi = cpi;
+
+ // Allocate thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td,
+ vpx_memalign(32, sizeof(*thread_data->td)));
+ vp9_zero(*thread_data->td);
+
+ // Set up pc_tree.
+ thread_data->td->leaf_tree = NULL;
+ thread_data->td->pc_tree = NULL;
+ vp9_setup_pc_tree(cm, thread_data->td);
+
+ // Allocate frame counters in thread data.
+ CHECK_MEM_ERROR(cm, thread_data->td->counts,
+ vpx_calloc(1, sizeof(*thread_data->td->counts)));
+
+ // Create threads
+ if (!winterface->reset(worker))
+ vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
+ "Tile encoder thread creation failed");
} else {
// Main thread acts as a worker and uses the thread data in cpi.
thread_data->cpi = cpi;
@@ -119,10 +119,14 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
thread_data = (EncWorkerData*)worker->data1;
// Before encoding a frame, copy the thread data from cpi.
- thread_data->td->mb = cpi->td.mb;
- thread_data->td->rd_counts = cpi->td.rd_counts;
- vpx_memcpy(thread_data->td->counts, &cpi->common.counts,
- sizeof(cpi->common.counts));
+ if (thread_data->td != &cpi->td) {
+ thread_data->td->mb = cpi->td.mb;
+ thread_data->td->rd_counts = cpi->td.rd_counts;
+ }
+ if (thread_data->td->counts != &cpi->common.counts) {
+ vpx_memcpy(thread_data->td->counts, &cpi->common.counts,
+ sizeof(cpi->common.counts));
+ }
// Handle use_nonrd_pick_mode case.
if (cpi->sf.use_nonrd_pick_mode) {
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 2f9cccb99..4a6074e51 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -224,15 +224,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
*var_y = var;
*sse_y = sse;
- x->skip_txfm[0] = 0;
- // Check if all ac coefficients can be quantized to zero.
- if (var < ac_thr || var == 0) {
- x->skip_txfm[0] = 2;
- // Check if dc coefficient can be quantized to zero.
- if (sse - var < dc_thr || sse == var)
- x->skip_txfm[0] = 1;
- }
-
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
xd->mi[0].src_mi->mbmi.tx_size =
@@ -254,6 +245,32 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
+ // Evaluate if the partition block is a skippable block in Y plane.
+ {
+ const BLOCK_SIZE unit_size =
+ txsize_to_bsize[xd->mi[0].src_mi->mbmi.tx_size];
+ const unsigned int num_blk_log2 =
+ (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) +
+ (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]);
+ const unsigned int sse_tx = sse >> num_blk_log2;
+ const unsigned int var_tx = var >> num_blk_log2;
+
+ x->skip_txfm[0] = 0;
+ // Check if all ac coefficients can be quantized to zero.
+ if (var_tx < ac_thr || var == 0) {
+ x->skip_txfm[0] = 2;
+ // Check if dc coefficient can be quantized to zero.
+ if (sse_tx - var_tx < dc_thr || sse == var)
+ x->skip_txfm[0] = 1;
+ }
+ }
+
+ if (x->skip_txfm[0] == 1) {
+ *out_rate_sum = 0;
+ *out_dist_sum = sse << 4;
+ return;
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
@@ -285,9 +302,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
*out_rate_sum += rate;
*out_dist_sum += dist << 4;
-
- if (*out_rate_sum == 0)
- x->skip_txfm[0] = 1;
}
static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize,
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index ef28caba4..d1dd66b2c 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -51,7 +51,7 @@ static struct vp9_extracfg default_extra_cfg = {
0, // noise_sensitivity
0, // sharpness
0, // static_thresh
- 0, // tile_columns
+ 6, // tile_columns
0, // tile_rows
7, // arnr_max_frames
5, // arnr_strength
@@ -61,7 +61,7 @@ static struct vp9_extracfg default_extra_cfg = {
0, // rc_max_inter_bitrate_pct
0, // gf_cbr_boost_pct
0, // lossless
- 0, // frame_parallel_decoding_mode
+ 1, // frame_parallel_decoding_mode
NO_AQ, // aq_mode
0, // frame_periodic_delta_q
VPX_BITS_8, // Bit depth
@@ -1414,7 +1414,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
0,
{ // NOLINT
0, // g_usage
- 0, // g_threads
+ 8, // g_threads
0, // g_profile
320, // g_width