diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_mvref_common.c | 18 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 4 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodemv.c | 2 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.h | 1 | ||||
-rw-r--r-- | vp9/decoder/vp9_dthread.c | 32 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_ethread.c | 52 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 38 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 6 |
9 files changed, 91 insertions, 73 deletions
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 51aa82411..52a731329 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -69,13 +69,23 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, } } - // Synchronize here for frame parallel decode if sync function is provided. - if (sync != NULL) { - sync(data, mi_row); - } + // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast + // on windows platform. The sync here is unncessary if use_perv_frame_mvs + // is 0. But after removing it, there will be hang in the unit test on windows + // due to several threads waiting for a thread's signal. +#if defined(_WIN32) && !HAVE_PTHREAD_H + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } +#endif // Check the last frame's mode and mv info. if (cm->use_prev_frame_mvs) { + // Synchronize here for frame parallel decode if sync function is provided. + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } + if (prev_frame_mvs->ref_frame[0] == ref_frame) { ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index c183cf38e..8840750fd 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -1926,7 +1926,7 @@ void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, // pixels of each superblock row can be changed by next superblock row. if (pbi->frame_parallel_decode) vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - (y1 + 7) << (plane == 0 ? 0 : 1)); + MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1))); // Skip border extension if block is inside the frame. if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || @@ -1982,7 +1982,7 @@ void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd, // pixels of each superblock row can be changed by next superblock row. if (pbi->frame_parallel_decode) vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf, - (y1 + 7) << (plane == 0 ? 0 : 1)); + MAX(0, (y1 + 7) << (plane == 0 ? 0 : 1))); } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index ec32f1d30..37bdfe058 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -457,7 +457,7 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, static void fpm_sync(void *const data, int mi_row) { VP9Decoder *const pbi = (VP9Decoder *)data; - vp9_frameworker_wait(pbi->frame_worker_owner, pbi->prev_buf, + vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, mi_row << MI_BLOCK_SIZE_LOG2); } diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h index 10598ca3c..c19f0ac3b 100644 --- a/vp9/decoder/vp9_decoder.h +++ b/vp9/decoder/vp9_decoder.h @@ -55,7 +55,6 @@ typedef struct VP9Decoder { // TODO(hkuang): Combine this with cur_buf in macroblockd as they are // the same. RefCntBuffer *cur_buf; // Current decoding frame buffer. - RefCntBuffer *prev_buf; // Previous decoding frame buffer. VP9Worker *frame_worker_owner; // frame_worker that owns this pbi. VP9Worker lf_worker; diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c index d2a2b819c..be2e6cd15 100644 --- a/vp9/decoder/vp9_dthread.c +++ b/vp9/decoder/vp9_dthread.c @@ -38,9 +38,15 @@ void vp9_frameworker_unlock_stats(VP9Worker *const worker) { void vp9_frameworker_signal_stats(VP9Worker *const worker) { #if CONFIG_MULTITHREAD FrameWorkerData *const worker_data = worker->data1; - // TODO(hkuang): Investigate using broadcast or signal. + +// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper. +#if defined(_WIN32) && !HAVE_PTHREAD_H pthread_cond_signal(&worker_data->stats_cond); #else + pthread_cond_broadcast(&worker_data->stats_cond); +#endif + +#else (void)worker; #endif } @@ -144,44 +150,22 @@ void vp9_frameworker_copy_context(VP9Worker *const dst_worker, &src_worker_data->stats_mutex); } - // src worker may have already finished decoding a frame and swapped the mi. - // TODO(hkuang): Remove following code after implenment no ModeInfo decoding. - if (src_worker_data->frame_decoded) { - dst_cm->prev_mip = src_cm->prev_mip; - dst_cm->prev_mi = src_cm->prev_mi; - } else { - dst_cm->prev_mip = src_cm->mip; - dst_cm->prev_mi = src_cm->mi; - } - dst_cm->last_frame_seg_map = src_cm->seg.enabled ? src_cm->current_frame_seg_map : src_cm->last_frame_seg_map; dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; vp9_frameworker_unlock_stats(src_worker); - dst_worker_data->pbi->prev_buf = - src_worker_data->pbi->common.show_existing_frame ? - NULL : src_worker_data->pbi->cur_buf; - dst_cm->prev_frame = src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame; dst_cm->last_width = !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width; dst_cm->last_height = !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height; - dst_cm->display_width = src_cm->display_width; - dst_cm->display_height = src_cm->display_height; dst_cm->subsampling_x = src_cm->subsampling_x; dst_cm->subsampling_y = src_cm->subsampling_y; + dst_cm->frame_type = src_cm->frame_type; dst_cm->last_show_frame = !src_cm->show_existing_frame ? src_cm->show_frame : src_cm->last_show_frame; - dst_cm->last_frame_type = src_cm->last_frame_type; - dst_cm->frame_type = src_cm->frame_type; - dst_cm->y_dc_delta_q = src_cm->y_dc_delta_q; - dst_cm->uv_dc_delta_q = src_cm->uv_dc_delta_q; - dst_cm->uv_ac_delta_q = src_cm->uv_ac_delta_q; - dst_cm->base_qindex = src_cm->base_qindex; - for (i = 0; i < REF_FRAMES; ++i) dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a86981a71..4949997db 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -595,9 +595,16 @@ static void choose_partitioning(VP9_COMP *cpi, for (i = 1; i <= 2; ++i) { struct macroblock_plane *p = &x->plane[i]; struct macroblockd_plane *pd = &xd->plane[i]; +#if GLOBAL_MOTION + const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); +#else const BLOCK_SIZE bs = get_plane_block_size(BLOCK_64X64, pd); - uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride); +#endif + if (bs == BLOCK_INVALID) + uv_sad = INT_MAX; + else + uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); #if GLOBAL_MOTION x->color_sensitivity[i - 1] = uv_sad * 4 > y_sad; diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 9916cc06c..46814cdfd 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -79,26 +79,26 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { winterface->init(worker); if (i < num_workers - 1) { - thread_data->cpi = cpi; - - // Allocate thread data. - CHECK_MEM_ERROR(cm, thread_data->td, - vpx_memalign(32, sizeof(*thread_data->td))); - vp9_zero(*thread_data->td); - - // Set up pc_tree. - thread_data->td->leaf_tree = NULL; - thread_data->td->pc_tree = NULL; - vp9_setup_pc_tree(cm, thread_data->td); - - // Allocate frame counters in thread data. - CHECK_MEM_ERROR(cm, thread_data->td->counts, - vpx_calloc(1, sizeof(*thread_data->td->counts))); - - // Create threads - if (!winterface->reset(worker)) - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Tile encoder thread creation failed"); + thread_data->cpi = cpi; + + // Allocate thread data. + CHECK_MEM_ERROR(cm, thread_data->td, + vpx_memalign(32, sizeof(*thread_data->td))); + vp9_zero(*thread_data->td); + + // Set up pc_tree. + thread_data->td->leaf_tree = NULL; + thread_data->td->pc_tree = NULL; + vp9_setup_pc_tree(cm, thread_data->td); + + // Allocate frame counters in thread data. + CHECK_MEM_ERROR(cm, thread_data->td->counts, + vpx_calloc(1, sizeof(*thread_data->td->counts))); + + // Create threads + if (!winterface->reset(worker)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; @@ -119,10 +119,14 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) { thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. - thread_data->td->mb = cpi->td.mb; - thread_data->td->rd_counts = cpi->td.rd_counts; - vpx_memcpy(thread_data->td->counts, &cpi->common.counts, - sizeof(cpi->common.counts)); + if (thread_data->td != &cpi->td) { + thread_data->td->mb = cpi->td.mb; + thread_data->td->rd_counts = cpi->td.rd_counts; + } + if (thread_data->td->counts != &cpi->common.counts) { + vpx_memcpy(thread_data->td->counts, &cpi->common.counts, + sizeof(cpi->common.counts)); + } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 2f9cccb99..4a6074e51 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -224,15 +224,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, *var_y = var; *sse_y = sse; - x->skip_txfm[0] = 0; - // Check if all ac coefficients can be quantized to zero. - if (var < ac_thr || var == 0) { - x->skip_txfm[0] = 2; - // Check if dc coefficient can be quantized to zero. - if (sse - var < dc_thr || sse == var) - x->skip_txfm[0] = 1; - } - if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) xd->mi[0].src_mi->mbmi.tx_size = @@ -254,6 +245,32 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } + // Evaluate if the partition block is a skippable block in Y plane. + { + const BLOCK_SIZE unit_size = + txsize_to_bsize[xd->mi[0].src_mi->mbmi.tx_size]; + const unsigned int num_blk_log2 = + (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) + + (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]); + const unsigned int sse_tx = sse >> num_blk_log2; + const unsigned int var_tx = var >> num_blk_log2; + + x->skip_txfm[0] = 0; + // Check if all ac coefficients can be quantized to zero. + if (var_tx < ac_thr || var == 0) { + x->skip_txfm[0] = 2; + // Check if dc coefficient can be quantized to zero. + if (sse_tx - var_tx < dc_thr || sse == var) + x->skip_txfm[0] = 1; + } + } + + if (x->skip_txfm[0] == 1) { + *out_rate_sum = 0; + *out_dist_sum = sse << 4; + return; + } + #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], @@ -285,9 +302,6 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, *out_rate_sum += rate; *out_dist_sum += dist << 4; - - if (*out_rate_sum == 0) - x->skip_txfm[0] = 1; } static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE bsize, diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index ef28caba4..d1dd66b2c 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -51,7 +51,7 @@ static struct vp9_extracfg default_extra_cfg = { 0, // noise_sensitivity 0, // sharpness 0, // static_thresh - 0, // tile_columns + 6, // tile_columns 0, // tile_rows 7, // arnr_max_frames 5, // arnr_strength @@ -61,7 +61,7 @@ static struct vp9_extracfg default_extra_cfg = { 0, // rc_max_inter_bitrate_pct 0, // gf_cbr_boost_pct 0, // lossless - 0, // frame_parallel_decoding_mode + 1, // frame_parallel_decoding_mode NO_AQ, // aq_mode 0, // frame_periodic_delta_q VPX_BITS_8, // Bit depth @@ -1414,7 +1414,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { 0, { // NOLINT 0, // g_usage - 0, // g_threads + 8, // g_threads 0, // g_profile 320, // g_width |