diff options
-rw-r--r-- | vp8/encoder/ethreading.c | 1 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 34 | ||||
-rw-r--r-- | vp8/encoder/onyx_int.h | 1 | ||||
-rw-r--r-- | vp8/encoder/pickinter.c | 43 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 228 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodemv.c | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_denoiser.c | 49 | ||||
-rw-r--r-- | vp9/encoder/vp9_denoiser.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 47 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 2 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 13 | ||||
-rw-r--r-- | vpxenc.c | 3 |
13 files changed, 244 insertions, 190 deletions
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 4f689c4bc..2a0c2987b 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -518,7 +518,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; - cpi->b_lpf_running = 0; pthread_mutex_init(&cpi->mt_mutex, NULL); diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 93c457008..88c191e94 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1531,15 +1531,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (!oxcf) return; -#if CONFIG_MULTITHREAD - /* wait for the last picture loopfilter thread done */ - if (cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - if (cm->version != oxcf->Version) { cm->version = oxcf->Version; @@ -3638,15 +3629,6 @@ static void encode_frame_to_data_rate /* Clear down mmx registers to allow floating point in what follows */ vp8_clear_system_state(); -#if CONFIG_MULTITHREAD - /* wait for the last picture loopfilter thread done */ - if (cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - if(cpi->force_next_frame_intra) { cm->frame_type = KEY_FRAME; /* delayed intra frame */ @@ -4375,8 +4357,6 @@ static void encode_frame_to_data_rate vp8_setup_key_frame(cpi); } - - #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { if(cpi->oxcf.error_resilient_mode) @@ -4842,7 +4822,6 @@ static void encode_frame_to_data_rate { /* start loopfilter in separate thread */ sem_post(&cpi->h_event_start_lpf); - cpi->b_lpf_running = 1; } else #endif @@ -4874,11 +4853,10 @@ static void encode_frame_to_data_rate vp8_pack_bitstream(cpi, dest, dest_end, size); #if CONFIG_MULTITHREAD - /* if PSNR packets are generated we have to wait for the lpf */ - if (cpi->b_lpf_running && cpi->b_calculate_psnr) + /* wait for the lpf thread done */ + if (cpi->b_multi_threaded) { sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; } #endif @@ -5838,14 +5816,6 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla { int ret; -#if CONFIG_MULTITHREAD - if(cpi->b_lpf_running) - { - sem_wait(&cpi->h_event_end_lpf); - cpi->b_lpf_running = 0; - } -#endif - #if CONFIG_POSTPROC cpi->common.show_frame_mi = cpi->common.mi; ret = vp8_post_proc_frame(&cpi->common, dest, flags); diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 2b2f7a0a9..6ede9b95a 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -536,7 +536,6 @@ typedef struct VP8_COMP int mt_sync_range; int b_multi_threaded; int encoding_thread_count; - int b_lpf_running; pthread_t *h_encoding_thread; pthread_t h_filter_thread; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 0ea063291..51fbe541c 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -50,7 +50,8 @@ extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; static const int skin_mean[5][2] = {{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}}; static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16 -static const int skin_threshold[2] = {1570636, 800000}; // q18 +static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000, + 800000}; // q18 // Evaluates the Mahalanobis distance measure for the input CbCr values. static int evaluate_skin_color_difference(int cb, int cr, int idx) { @@ -73,7 +74,7 @@ static int evaluate_skin_color_difference(int cb, int cr, int idx) { } // Checks if the input yCbCr values corresponds to skin color. -static int is_skin_color(int y, int cb, int cr) +static int is_skin_color(int y, int cb, int cr, int consec_zeromv) { if (y < 40 || y > 220) { @@ -88,13 +89,31 @@ static int is_skin_color(int y, int cb, int cr) else { int i = 0; - for (; i < 5; i++) - { - if (evaluate_skin_color_difference(cb, cr, i) < skin_threshold[1]) - { - return 1; - } - } + // No skin if block has been zero motion for long consecutive time. + if (consec_zeromv > 80) + return 0; + // Exit on grey. + if (cb == 128 && cr == 128) + return 0; + // Exit on very strong cb. + if (cb > 150 && cr < 110) + return 0; + for (; i < 5; i++) { + int skin_color_diff = evaluate_skin_color_difference(cb, cr, i); + if (skin_color_diff < skin_threshold[i + 1]) { + if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2)) + return 0; + else if (consec_zeromv > 30 && + skin_color_diff > (skin_threshold[i + 1] >> 1)) + return 0; + else + return 1; + } + // Exit if difference is much large than the threshold. + if (skin_color_diff > (skin_threshold[i + 1] << 3)) { + return 0; + } + } return 0; } } @@ -851,8 +870,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, x->src.v_buffer[4 * x->src.uv_stride + 3] + x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2; x->is_skin = 0; - if (!cpi->oxcf.screen_content_mode) - x->is_skin = is_skin_color(y, cb, cr); + if (!cpi->oxcf.screen_content_mode) { + int block_index = mb_row * cpi->common.mb_cols + mb_col; + x->is_skin = is_skin_color(y, cb, cr, cpi->consec_zero_last[block_index]); + } } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 7fc573333..32c72194d 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -189,54 +189,31 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; - if (eob > 0) { - tran_low_t *const dqcoeff = pd->dqcoeff; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_idct4x4_add(dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_idct8x8_add(dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_idct16x16_add(dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } + switch (tx_size) { + case TX_4X4: + vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); } } -#else + } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { @@ -258,18 +235,40 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, return; } } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } #endif // CONFIG_VP9_HIGHBITDEPTH - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } @@ -279,54 +278,31 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; - if (eob > 0) { - tran_low_t *const dqcoeff = pd->dqcoeff; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); - } else { - switch (tx_size) { - case TX_4X4: - vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_8X8: - vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_16X16: - vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); - break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); - break; - default: - assert(0 && "Invalid transform size"); - } - } + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); } else { - if (xd->lossless) { - vp9_iwht4x4_add(dqcoeff, dst, stride, eob); - } else { - switch (tx_size) { - case TX_4X4: - vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_8X8: - vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_16X16: - vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); - break; - case TX_32X32: - vp9_idct32x32_add(dqcoeff, dst, stride, eob); - break; - default: - assert(0 && "Invalid transform size"); - return; - } + switch (tx_size) { + case TX_4X4: + vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); } } -#else + } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { @@ -348,18 +324,40 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, return; } } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } #endif // CONFIG_VP9_HIGHBITDEPTH - if (eob == 1) { - dqcoeff[0] = 0; - } else { - if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) - memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); - else if (tx_size == TX_32X32 && eob <= 34) - memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); - else - memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); - } + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } @@ -389,8 +387,10 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mi->segment_id); - inverse_transform_block_intra(xd, plane, tx_type, tx_size, - dst, pd->dst.stride, eob); + if (eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, + dst, pd->dst.stride, eob); + } } } @@ -402,9 +402,11 @@ static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, mi->segment_id); - inverse_transform_block_inter(xd, plane, tx_size, - &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], - pd->dst.stride, eob); + if (eob > 0) { + inverse_transform_block_inter( + xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], + pd->dst.stride, eob); + } return eob; } diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index 3bd42ece6..596427c1e 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -463,14 +463,13 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, } } -static void dec_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist, - int_mv *best_mv, int refmv_count) { +static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, + int refmv_count) { int i; // Make sure all the candidates are properly clamped etc for (i = 0; i < refmv_count; ++i) { lower_mv_precision(&mvlist[i].as_mv, allow_hp); - clamp_mv2(&mvlist[i].as_mv, xd); *best_mv = mvlist[i]; } } @@ -778,7 +777,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi, tmp_mvs, mi_row, mi_col, -1, 0, fpm_sync, (void *)pbi); - dec_find_best_ref_mvs(xd, allow_hp, tmp_mvs, &best_ref_mvs[ref], + dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], refmv_count); } } diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 9bc9f26c1..9eca2a229 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -316,13 +316,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser, return FILTER_BLOCK; } -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, +void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, VP9_DENOISER_DECISION *denoiser_decision) { int mv_col, mv_row; int motion_magnitude = 0; int zeromv_filter = 0; + VP9_DENOISER *denoiser = &cpi->denoiser; VP9_DENOISER_DECISION decision = COPY_BLOCK; YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; @@ -331,21 +332,53 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; int is_skin = 0; + mv_col = ctx->best_sse_mv.as_mv.col; + mv_row = ctx->best_sse_mv.as_mv.row; + motion_magnitude = mv_row * mv_row + mv_col * mv_col; - if (bs <= BLOCK_32X32 && denoiser->denoising_level >= kDenLow) { + if (cpi->use_skin_detection && + bs <= BLOCK_32X32 && + denoiser->denoising_level >= kDenLow) { + int motion_level = (motion_magnitude < 16) ? 0 : 1; + // If motion for current block is small/zero, compute consec_zeromv for + // skin detection (early exit in skin detection is done for large + // consec_zeromv when current block has small/zero motion). + int consec_zeromv = 0; + if (motion_level == 0) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + VP9_COMMON * const cm = &cpi->common; + int j, i; + // Loop through the 8x8 sub-blocks. + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + const int block_index = mi_row * cm->mi_cols + mi_col; + consec_zeromv = 100; + for (i = 0; i < ymis; i++) { + for (j = 0; j < xmis; j++) { + int bl_index = block_index + i * cm->mi_cols + j; + consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], consec_zeromv); + // No need to keep checking 8x8 blocks if any of the sub-blocks + // has small consec_zeromv (since threshold for no_skin based on + // zero/small motion in skin detection is high, i.e, > 5). + if (consec_zeromv < 5) { + i = ymis; + j = xmis; + } + } + } + } + // TODO(marpan): Compute skin detection over sub-blocks. is_skin = vp9_compute_skin_block(mb->plane[0].src.buf, mb->plane[1].src.buf, mb->plane[2].src.buf, mb->plane[0].src.stride, mb->plane[1].src.stride, bs, - 0, - 0); + consec_zeromv, + motion_level); } - - mv_col = ctx->best_sse_mv.as_mv.col; - mv_row = ctx->best_sse_mv.as_mv.row; - motion_magnitude = mv_row * mv_row + mv_col * mv_col; if (!is_skin && denoiser->denoising_level == kDenHigh && motion_magnitude < 16) { diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index 9f13bd533..a0e201781 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -53,7 +53,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, int refresh_last_frame, int resized); -void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, +void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx , VP9_DENOISER_DECISION *denoiser_decision); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 84593836c..40b332ac8 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -772,36 +772,55 @@ static int choose_partitioning(VP9_COMP *cpi, vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); // Check if most of the superblock is skin content, and if so, force split - // to 32x32. Avoid checking superblocks on/near boundary and avoid low - // resolutons for now. + // to 32x32, and set x->sb_is_skin for use in mode selection. + // Avoid checking superblocks on/near boundary and avoid low resolutions. // Note superblock may still pick 64X64 if y_sad is very small // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. x->sb_is_skin = 0; #if !CONFIG_VP9_HIGHBITDEPTH if (cpi->use_skin_detection && !low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) { + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + int bl_index1, bl_index2, bl_index3; int num_16x16_skin = 0; int num_16x16_nonskin = 0; + int is_skin = 0; + int consec_zeromv = 0; uint8_t *ysignal = x->plane[0].src.buf; uint8_t *usignal = x->plane[1].src.buf; uint8_t *vsignal = x->plane[2].src.buf; int spuv = x->plane[1].src.stride; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - int is_skin = vp9_compute_skin_block(ysignal, - usignal, - vsignal, - sp, - spuv, - BLOCK_16X16, - 0, - 0); + const int block_index = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; + const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + // Loop through the 16x16 sub-blocks. + int j, i; + for (i = 0; i < ymis; i+=2) { + for (j = 0; j < xmis; j+=2) { + int bl_index = block_index + i * cm->mi_cols + j; + bl_index1 = bl_index + 1; + bl_index2 = bl_index + cm->mi_cols; + bl_index3 = bl_index2 + 1; + consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], + VPXMIN(cr->consec_zero_mv[bl_index1], + VPXMIN(cr->consec_zero_mv[bl_index2], + cr->consec_zero_mv[bl_index3]))); + is_skin = vp9_compute_skin_block(ysignal, + usignal, + vsignal, + sp, + spuv, + BLOCK_16X16, + consec_zeromv, + 0); num_16x16_skin += is_skin; num_16x16_nonskin += (1 - is_skin); if (num_16x16_nonskin > 3) { // Exit loop if at least 4 of the 16x16 blocks are not skin. - i = 4; - j = 4; + i = ymis; + j = xmis; } ysignal += 16; usignal += 8; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 3ea2ccd88..8b2e98549 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1825,8 +1825,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0) { VP9_DENOISER_DECISION decision = COPY_BLOCK; - vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, - VPXMAX(BLOCK_8X8, bsize), ctx, &decision); + vp9_denoiser_denoise(cpi, x, mi_row, mi_col, VPXMAX(BLOCK_8X8, bsize), + ctx, &decision); // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on denoised // result. Only do this under noise conditions, and if rdcost of ZEROMV on // original source is not significantly higher than rdcost of best mode. diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index c014ca174..02be3c3f9 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -400,6 +400,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->simple_model_rd_from_var = 1; + if (cpi->oxcf.rc_mode == VPX_VBR) + sf->mv.search_method = NSTEP; if (!is_keyframe) { int i; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 624d5c9fc..5921636d3 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -158,7 +158,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); - RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); + RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); @@ -486,7 +486,16 @@ static vpx_codec_err_t set_encoder_config( oxcf->content = extra_cfg->content; oxcf->tile_columns = extra_cfg->tile_columns; - oxcf->tile_rows = extra_cfg->tile_rows; + + // TODO(yunqing): The dependencies between row tiles cause error in multi- + // threaded encoding. For now, tile_rows is forced to be 0 in this case. + // The further fix can be done by adding synchronizations after a tile row + // is encoded. But this will hurt multi-threaded encoder performance. So, + // it is recommended to use tile-rows=0 while encoding with threads > 1. + if (oxcf->max_threads > 1 && oxcf->tile_columns > 0) + oxcf->tile_rows = 0; + else + oxcf->tile_rows = extra_cfg->tile_rows; oxcf->error_resilient_mode = cfg->g_error_resilient; oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; @@ -380,7 +380,8 @@ static const arg_def_t cpu_used_vp9 = ARG_DEF( static const arg_def_t tile_cols = ARG_DEF( NULL, "tile-columns", 1, "Number of tile columns to use, log2"); static const arg_def_t tile_rows = ARG_DEF( - NULL, "tile-rows", 1, "Number of tile rows to use, log2"); + NULL, "tile-rows", 1, + "Number of tile rows to use, log2 (set to 0 while threads > 1)"); static const arg_def_t lossless = ARG_DEF( NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)"); static const arg_def_t frame_parallel_decoding = ARG_DEF( |