summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp8/encoder/ethreading.c1
-rw-r--r--vp8/encoder/onyx_if.c34
-rw-r--r--vp8/encoder/onyx_int.h1
-rw-r--r--vp8/encoder/pickinter.c43
-rw-r--r--vp9/decoder/vp9_decodeframe.c228
-rw-r--r--vp9/decoder/vp9_decodemv.c7
-rw-r--r--vp9/encoder/vp9_denoiser.c49
-rw-r--r--vp9/encoder/vp9_denoiser.h2
-rw-r--r--vp9/encoder/vp9_encodeframe.c47
-rw-r--r--vp9/encoder/vp9_pickmode.c4
-rw-r--r--vp9/encoder/vp9_speed_features.c2
-rw-r--r--vp9/vp9_cx_iface.c13
-rw-r--r--vpxenc.c3
13 files changed, 244 insertions, 190 deletions
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 4f689c4bc..2a0c2987b 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -518,7 +518,6 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi)
cpi->b_multi_threaded = 0;
cpi->encoding_thread_count = 0;
- cpi->b_lpf_running = 0;
pthread_mutex_init(&cpi->mt_mutex, NULL);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 93c457008..88c191e94 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1531,15 +1531,6 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (!oxcf)
return;
-#if CONFIG_MULTITHREAD
- /* wait for the last picture loopfilter thread done */
- if (cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
if (cm->version != oxcf->Version)
{
cm->version = oxcf->Version;
@@ -3638,15 +3629,6 @@ static void encode_frame_to_data_rate
/* Clear down mmx registers to allow floating point in what follows */
vp8_clear_system_state();
-#if CONFIG_MULTITHREAD
- /* wait for the last picture loopfilter thread done */
- if (cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
if(cpi->force_next_frame_intra)
{
cm->frame_type = KEY_FRAME; /* delayed intra frame */
@@ -4375,8 +4357,6 @@ static void encode_frame_to_data_rate
vp8_setup_key_frame(cpi);
}
-
-
#if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING
{
if(cpi->oxcf.error_resilient_mode)
@@ -4842,7 +4822,6 @@ static void encode_frame_to_data_rate
{
/* start loopfilter in separate thread */
sem_post(&cpi->h_event_start_lpf);
- cpi->b_lpf_running = 1;
}
else
#endif
@@ -4874,11 +4853,10 @@ static void encode_frame_to_data_rate
vp8_pack_bitstream(cpi, dest, dest_end, size);
#if CONFIG_MULTITHREAD
- /* if PSNR packets are generated we have to wait for the lpf */
- if (cpi->b_lpf_running && cpi->b_calculate_psnr)
+ /* wait for the lpf thread done */
+ if (cpi->b_multi_threaded)
{
sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
}
#endif
@@ -5838,14 +5816,6 @@ int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppfla
{
int ret;
-#if CONFIG_MULTITHREAD
- if(cpi->b_lpf_running)
- {
- sem_wait(&cpi->h_event_end_lpf);
- cpi->b_lpf_running = 0;
- }
-#endif
-
#if CONFIG_POSTPROC
cpi->common.show_frame_mi = cpi->common.mi;
ret = vp8_post_proc_frame(&cpi->common, dest, flags);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 2b2f7a0a9..6ede9b95a 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -536,7 +536,6 @@ typedef struct VP8_COMP
int mt_sync_range;
int b_multi_threaded;
int encoding_thread_count;
- int b_lpf_running;
pthread_t *h_encoding_thread;
pthread_t h_filter_thread;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 0ea063291..51fbe541c 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -50,7 +50,8 @@ extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
static const int skin_mean[5][2] =
{{7463, 9614}, {6400, 10240}, {7040, 10240}, {8320, 9280}, {6800, 9614}};
static const int skin_inv_cov[4] = {4107, 1663, 1663, 2157}; // q16
-static const int skin_threshold[2] = {1570636, 800000}; // q18
+static const int skin_threshold[6] = {1570636, 1400000, 800000, 800000, 800000,
+ 800000}; // q18
// Evaluates the Mahalanobis distance measure for the input CbCr values.
static int evaluate_skin_color_difference(int cb, int cr, int idx) {
@@ -73,7 +74,7 @@ static int evaluate_skin_color_difference(int cb, int cr, int idx) {
}
// Checks if the input yCbCr values corresponds to skin color.
-static int is_skin_color(int y, int cb, int cr)
+static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
{
if (y < 40 || y > 220)
{
@@ -88,13 +89,31 @@ static int is_skin_color(int y, int cb, int cr)
else
{
int i = 0;
- for (; i < 5; i++)
- {
- if (evaluate_skin_color_difference(cb, cr, i) < skin_threshold[1])
- {
- return 1;
- }
- }
+ // No skin if block has been zero motion for long consecutive time.
+ if (consec_zeromv > 80)
+ return 0;
+ // Exit on grey.
+ if (cb == 128 && cr == 128)
+ return 0;
+ // Exit on very strong cb.
+ if (cb > 150 && cr < 110)
+ return 0;
+ for (; i < 5; i++) {
+ int skin_color_diff = evaluate_skin_color_difference(cb, cr, i);
+ if (skin_color_diff < skin_threshold[i + 1]) {
+ if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
+ return 0;
+ else if (consec_zeromv > 30 &&
+ skin_color_diff > (skin_threshold[i + 1] >> 1))
+ return 0;
+ else
+ return 1;
+ }
+ // Exit if difference is much large than the threshold.
+ if (skin_color_diff > (skin_threshold[i + 1] << 3)) {
+ return 0;
+ }
+ }
return 0;
}
}
@@ -851,8 +870,10 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
x->src.v_buffer[4 * x->src.uv_stride + 3] +
x->src.v_buffer[4 * x->src.uv_stride + 4]) >> 2;
x->is_skin = 0;
- if (!cpi->oxcf.screen_content_mode)
- x->is_skin = is_skin_color(y, cb, cr);
+ if (!cpi->oxcf.screen_content_mode) {
+ int block_index = mb_row * cpi->common.mb_cols + mb_col;
+ x->is_skin = is_skin_color(y, cb, cr, cpi->consec_zero_last[block_index]);
+ }
}
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity) {
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 7fc573333..32c72194d 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -189,54 +189,31 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
uint8_t *dst, int stride,
int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- default:
- assert(0 && "Invalid transform size");
- }
- }
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless) {
+ vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
- if (xd->lossless) {
- vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_idct4x4_add(dqcoeff, dst, stride, eob);
- break;
- case TX_8X8:
- vp9_idct8x8_add(dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- vp9_idct16x16_add(dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_8X8:
+ vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_16X16:
+ vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_32X32:
+ vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
}
}
-#else
+ } else {
if (xd->lossless) {
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
@@ -258,18 +235,40 @@ static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane,
return;
}
}
+ }
+#else
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_idct4x4_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_idct8x8_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_idct16x16_add(dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (eob == 1) {
- dqcoeff[0] = 0;
- } else {
- if (tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
- }
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
}
}
@@ -279,54 +278,31 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
uint8_t *dst, int stride,
int eob) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- if (eob > 0) {
- tran_low_t *const dqcoeff = pd->dqcoeff;
+ tran_low_t *const dqcoeff = pd->dqcoeff;
+ assert(eob > 0);
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_8X8:
- vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_16X16:
- vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
- break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
- break;
- default:
- assert(0 && "Invalid transform size");
- }
- }
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ if (xd->lossless) {
+ vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd);
} else {
- if (xd->lossless) {
- vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
- } else {
- switch (tx_size) {
- case TX_4X4:
- vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_8X8:
- vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_16X16:
- vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
- break;
- case TX_32X32:
- vp9_idct32x32_add(dqcoeff, dst, stride, eob);
- break;
- default:
- assert(0 && "Invalid transform size");
- return;
- }
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_8X8:
+ vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_16X16:
+ vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ case TX_32X32:
+ vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
}
}
-#else
+ } else {
if (xd->lossless) {
vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
} else {
@@ -348,18 +324,40 @@ static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane,
return;
}
}
+ }
+#else
+ if (xd->lossless) {
+ vp9_iwht4x4_add(dqcoeff, dst, stride, eob);
+ } else {
+ switch (tx_size) {
+ case TX_4X4:
+ vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_8X8:
+ vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_16X16:
+ vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob);
+ break;
+ case TX_32X32:
+ vp9_idct32x32_add(dqcoeff, dst, stride, eob);
+ break;
+ default:
+ assert(0 && "Invalid transform size");
+ return;
+ }
+ }
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (eob == 1) {
- dqcoeff[0] = 0;
- } else {
- if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
- memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
- else if (tx_size == TX_32X32 && eob <= 34)
- memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
- else
- memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
- }
+ if (eob == 1) {
+ dqcoeff[0] = 0;
+ } else {
+ if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10)
+ memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0]));
+ else if (tx_size == TX_32X32 && eob <= 34)
+ memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0]));
+ else
+ memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0]));
}
}
@@ -389,8 +387,10 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
&vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type];
const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size,
r, mi->segment_id);
- inverse_transform_block_intra(xd, plane, tx_type, tx_size,
- dst, pd->dst.stride, eob);
+ if (eob > 0) {
+ inverse_transform_block_intra(xd, plane, tx_type, tx_size,
+ dst, pd->dst.stride, eob);
+ }
}
}
@@ -402,9 +402,11 @@ static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r,
mi->segment_id);
- inverse_transform_block_inter(xd, plane, tx_size,
- &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
- pd->dst.stride, eob);
+ if (eob > 0) {
+ inverse_transform_block_inter(
+ xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, eob);
+ }
return eob;
}
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index 3bd42ece6..596427c1e 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -463,14 +463,13 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
}
-static void dec_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist,
- int_mv *best_mv, int refmv_count) {
+static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv,
+ int refmv_count) {
int i;
// Make sure all the candidates are properly clamped etc
for (i = 0; i < refmv_count; ++i) {
lower_mv_precision(&mvlist[i].as_mv, allow_hp);
- clamp_mv2(&mvlist[i].as_mv, xd);
*best_mv = mvlist[i];
}
}
@@ -778,7 +777,7 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
tmp_mvs, mi_row, mi_col, -1, 0,
fpm_sync, (void *)pbi);
- dec_find_best_ref_mvs(xd, allow_hp, tmp_mvs, &best_ref_mvs[ref],
+ dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref],
refmv_count);
}
}
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index 9bc9f26c1..9eca2a229 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -316,13 +316,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
return FILTER_BLOCK;
}
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
+void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx,
VP9_DENOISER_DECISION *denoiser_decision) {
int mv_col, mv_row;
int motion_magnitude = 0;
int zeromv_filter = 0;
+ VP9_DENOISER *denoiser = &cpi->denoiser;
VP9_DENOISER_DECISION decision = COPY_BLOCK;
YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
@@ -331,21 +332,53 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
mi_row, mi_col);
struct buf_2d src = mb->plane[0].src;
int is_skin = 0;
+ mv_col = ctx->best_sse_mv.as_mv.col;
+ mv_row = ctx->best_sse_mv.as_mv.row;
+ motion_magnitude = mv_row * mv_row + mv_col * mv_col;
- if (bs <= BLOCK_32X32 && denoiser->denoising_level >= kDenLow) {
+ if (cpi->use_skin_detection &&
+ bs <= BLOCK_32X32 &&
+ denoiser->denoising_level >= kDenLow) {
+ int motion_level = (motion_magnitude < 16) ? 0 : 1;
+ // If motion for current block is small/zero, compute consec_zeromv for
+ // skin detection (early exit in skin detection is done for large
+ // consec_zeromv when current block has small/zero motion).
+ int consec_zeromv = 0;
+ if (motion_level == 0) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ VP9_COMMON * const cm = &cpi->common;
+ int j, i;
+ // Loop through the 8x8 sub-blocks.
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ consec_zeromv = 100;
+ for (i = 0; i < ymis; i++) {
+ for (j = 0; j < xmis; j++) {
+ int bl_index = block_index + i * cm->mi_cols + j;
+ consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index], consec_zeromv);
+ // No need to keep checking 8x8 blocks if any of the sub-blocks
+ // has small consec_zeromv (since threshold for no_skin based on
+ // zero/small motion in skin detection is high, i.e, > 5).
+ if (consec_zeromv < 5) {
+ i = ymis;
+ j = xmis;
+ }
+ }
+ }
+ }
+ // TODO(marpan): Compute skin detection over sub-blocks.
is_skin = vp9_compute_skin_block(mb->plane[0].src.buf,
mb->plane[1].src.buf,
mb->plane[2].src.buf,
mb->plane[0].src.stride,
mb->plane[1].src.stride,
bs,
- 0,
- 0);
+ consec_zeromv,
+ motion_level);
}
-
- mv_col = ctx->best_sse_mv.as_mv.col;
- mv_row = ctx->best_sse_mv.as_mv.row;
- motion_magnitude = mv_row * mv_row + mv_col * mv_col;
if (!is_skin &&
denoiser->denoising_level == kDenHigh &&
motion_magnitude < 16) {
diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h
index 9f13bd533..a0e201781 100644
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -53,7 +53,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
int refresh_last_frame,
int resized);
-void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
+void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb,
int mi_row, int mi_col, BLOCK_SIZE bs,
PICK_MODE_CONTEXT *ctx ,
VP9_DENOISER_DECISION *denoiser_decision);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 84593836c..40b332ac8 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -772,36 +772,55 @@ static int choose_partitioning(VP9_COMP *cpi,
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
// Check if most of the superblock is skin content, and if so, force split
- // to 32x32. Avoid checking superblocks on/near boundary and avoid low
- // resolutons for now.
+ // to 32x32, and set x->sb_is_skin for use in mode selection.
+ // Avoid checking superblocks on/near boundary and avoid low resolutions.
// Note superblock may still pick 64X64 if y_sad is very small
// (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is.
x->sb_is_skin = 0;
#if !CONFIG_VP9_HIGHBITDEPTH
if (cpi->use_skin_detection && !low_res && (mi_col >= 8 &&
mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ int bl_index1, bl_index2, bl_index3;
int num_16x16_skin = 0;
int num_16x16_nonskin = 0;
+ int is_skin = 0;
+ int consec_zeromv = 0;
uint8_t *ysignal = x->plane[0].src.buf;
uint8_t *usignal = x->plane[1].src.buf;
uint8_t *vsignal = x->plane[2].src.buf;
int spuv = x->plane[1].src.stride;
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- int is_skin = vp9_compute_skin_block(ysignal,
- usignal,
- vsignal,
- sp,
- spuv,
- BLOCK_16X16,
- 0,
- 0);
+ const int block_index = mi_row * cm->mi_cols + mi_col;
+ const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
+ const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ // Loop through the 16x16 sub-blocks.
+ int j, i;
+ for (i = 0; i < ymis; i+=2) {
+ for (j = 0; j < xmis; j+=2) {
+ int bl_index = block_index + i * cm->mi_cols + j;
+ bl_index1 = bl_index + 1;
+ bl_index2 = bl_index + cm->mi_cols;
+ bl_index3 = bl_index2 + 1;
+ consec_zeromv = VPXMIN(cr->consec_zero_mv[bl_index],
+ VPXMIN(cr->consec_zero_mv[bl_index1],
+ VPXMIN(cr->consec_zero_mv[bl_index2],
+ cr->consec_zero_mv[bl_index3])));
+ is_skin = vp9_compute_skin_block(ysignal,
+ usignal,
+ vsignal,
+ sp,
+ spuv,
+ BLOCK_16X16,
+ consec_zeromv,
+ 0);
num_16x16_skin += is_skin;
num_16x16_nonskin += (1 - is_skin);
if (num_16x16_nonskin > 3) {
// Exit loop if at least 4 of the 16x16 blocks are not skin.
- i = 4;
- j = 4;
+ i = ymis;
+ j = xmis;
}
ysignal += 16;
usignal += 8;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 3ea2ccd88..8b2e98549 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1825,8 +1825,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->oxcf.noise_sensitivity > 0 &&
cpi->resize_pending == 0) {
VP9_DENOISER_DECISION decision = COPY_BLOCK;
- vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
- VPXMAX(BLOCK_8X8, bsize), ctx, &decision);
+ vp9_denoiser_denoise(cpi, x, mi_row, mi_col, VPXMAX(BLOCK_8X8, bsize),
+ ctx, &decision);
// If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on denoised
// result. Only do this under noise conditions, and if rdcost of ZEROMV on
// original source is not significantly higher than rdcost of best mode.
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index c014ca174..02be3c3f9 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -400,6 +400,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->simple_model_rd_from_var = 1;
+ if (cpi->oxcf.rc_mode == VPX_VBR)
+ sf->mv.search_method = NSTEP;
if (!is_keyframe) {
int i;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 624d5c9fc..5921636d3 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -158,7 +158,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available
RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
- RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
+ RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000);
RANGE_CHECK_HI(cfg, g_profile, 3);
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
@@ -486,7 +486,16 @@ static vpx_codec_err_t set_encoder_config(
oxcf->content = extra_cfg->content;
oxcf->tile_columns = extra_cfg->tile_columns;
- oxcf->tile_rows = extra_cfg->tile_rows;
+
+ // TODO(yunqing): The dependencies between row tiles cause error in multi-
+ // threaded encoding. For now, tile_rows is forced to be 0 in this case.
+ // The further fix can be done by adding synchronizations after a tile row
+ // is encoded. But this will hurt multi-threaded encoder performance. So,
+ // it is recommended to use tile-rows=0 while encoding with threads > 1.
+ if (oxcf->max_threads > 1 && oxcf->tile_columns > 0)
+ oxcf->tile_rows = 0;
+ else
+ oxcf->tile_rows = extra_cfg->tile_rows;
oxcf->error_resilient_mode = cfg->g_error_resilient;
oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode;
diff --git a/vpxenc.c b/vpxenc.c
index f24b1805b..50e7c7fc7 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -380,7 +380,8 @@ static const arg_def_t cpu_used_vp9 = ARG_DEF(
static const arg_def_t tile_cols = ARG_DEF(
NULL, "tile-columns", 1, "Number of tile columns to use, log2");
static const arg_def_t tile_rows = ARG_DEF(
- NULL, "tile-rows", 1, "Number of tile rows to use, log2");
+ NULL, "tile-rows", 1,
+ "Number of tile rows to use, log2 (set to 0 while threads > 1)");
static const arg_def_t lossless = ARG_DEF(
NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)");
static const arg_def_t frame_parallel_decoding = ARG_DEF(