diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_block.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 57 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 34 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 4 |
5 files changed, 92 insertions, 22 deletions
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index bbdfbb823..069c33564 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -146,9 +146,9 @@ struct macroblock { uint8_t sb_is_skin; // Used to save the status of whether a block has a low variance in - // choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for - // 32x32. - uint8_t variance_low[9]; + // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for + // 32x32, 9~24 for 16x16. + uint8_t variance_low[25]; void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride); void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 21a66bb1b..29d31de52 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -773,7 +773,7 @@ static int choose_partitioning(VP9_COMP *cpi, } } - for (i = 0; i < 9; i++) { + for (i = 0; i < 25; i++) { x->variance_low[i] = 0; } @@ -1083,28 +1083,53 @@ static int choose_partitioning(VP9_COMP *cpi, } if (cpi->sf.short_circuit_low_temp_var) { - // Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was - // selected. - if (ref_frame_partition == LAST_FRAME) { + int mv_thr = cm->width > 640 ? 8 : 4; + // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected + // and int_pro mv is small. If the temporal variance is small set the + // variance_low flag for the block. The variance threshold can be adjusted, + // the higher the more aggressive. + if (ref_frame_partition == LAST_FRAME && + (cpi->sf.short_circuit_low_temp_var == 1 || + (xd->mi[0]->mv[0].as_mv.col < mv_thr && + xd->mi[0]->mv[0].as_mv.col > -mv_thr && + xd->mi[0]->mv[0].as_mv.row < mv_thr && + xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { if (xd->mi[0]->sb_type == BLOCK_64X64 && vt.part_variances.none.variance < (thresholds[0] >> 1)) { x->variance_low[0] = 1; } else if (xd->mi[0]->sb_type == BLOCK_64X32) { - if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2)) - x->variance_low[1] = 1; - if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2)) - x->variance_low[2] = 1; + for (j = 0; j < 2; j++) { + if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2)) + x->variance_low[j + 1] = 1; + } } else if (xd->mi[0]->sb_type == BLOCK_32X64) { - if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2)) - x->variance_low[3] = 1; - if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2)) - x->variance_low[4] = 1; + for (j = 0; j < 2; j++) { + if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2)) + x->variance_low[j + 3] = 1; + } } else { - // 32x32 for (i = 0; i < 4; i++) { - if (!force_split[i + 1] && - vt.split[i].part_variances.none.variance < (thresholds[1] >> 1)) - x->variance_low[i + 5] = 1; + if (!force_split[i + 1]) { + // 32x32 + if (vt.split[i].part_variances.none.variance < + (thresholds[1] >> 1)) + x->variance_low[i + 5] = 1; + } else if (cpi->sf.short_circuit_low_temp_var == 2) { + int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4}; + const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i]; + MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; + // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block + // inside. + if ((*this_mi)->sb_type == BLOCK_16X16 || + (*this_mi)->sb_type == BLOCK_32X16 || + (*this_mi)->sb_type == BLOCK_16X32) { + for (j = 0; j < 4; j++) { + if (vt.split[i].split[j].part_variances.none.variance < + (thresholds[2] >> 8)) + x->variance_low[(i << 2) + j + 9] = 1; + } + } + } } } } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 554409b74..7e3bc1b66 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -40,6 +40,14 @@ typedef struct { int in_use; } PRED_BUFFER; + +static const int pos_shift_16x16[4][4] = { + {9, 10, 13, 14}, + {11, 12, 15, 16}, + {17, 18, 21, 22}, + {19, 20, 23, 24} +}; + static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, @@ -1274,6 +1282,8 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low, int mi_row, int mi_col, BLOCK_SIZE bsize) { int force_skip_low_temp_var = 0; + int i = (mi_row & 0x7) >> 1; + int j = (mi_col & 0x7) >> 1; // Set force_skip_low_temp_var based on the block size and block offset. if (bsize == BLOCK_64X64) { force_skip_low_temp_var = variance_low[0]; @@ -1299,6 +1309,19 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low, } else if ((mi_col & 0x7) && (mi_row & 0x7)) { force_skip_low_temp_var = variance_low[8]; } + } else if (bsize == BLOCK_16X16) { + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; + } else if (bsize == BLOCK_32X16) { + // The col shift index for the second 16x16 block. + int j2 = ((mi_col + 2) & 0x7) >> 1; + // Only if each 16x16 block inside has low temporal variance. + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && + variance_low[pos_shift_16x16[i][j2]]; + } else if (bsize == BLOCK_16X32) { + // The row shift index for the second 16x16 block. + int i2 = ((mi_row + 2) & 0x7) >> 1; + force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && + variance_low[pos_shift_16x16[i2][j]]; } return force_skip_low_temp_var; } @@ -1503,6 +1526,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; } + if (cpi->sf.short_circuit_low_temp_var == 2 && + force_skip_low_temp_var && ref_frame == LAST_FRAME && + this_mode == NEWMV) { + continue; + } + if (cpi->use_svc) { if (svc_force_zero_mode[ref_frame - 1] && frame_mv[this_mode][ref_frame].as_int != 0) @@ -1842,8 +1871,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } // Perform intra prediction search, if the best SAD is above a certain - // threshold. Skip intra prediction if force_skip_low_temp_var is set. - if (!force_skip_low_temp_var && perform_intra_pred && + // threshold. + if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && + perform_intra_pred && (best_rdc.rdcost == INT64_MAX || (!x->skip && best_rdc.rdcost > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize))) { diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 0090b4f40..e7f04a244 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -429,7 +429,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; - if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 && + if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; @@ -450,6 +450,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->adaptive_rd_thresh = 4; sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2; sf->lpf_pick = LPF_PICK_MINIMAL_LPF; + // Only keep INTRA_DC mode for speed 8. + if (!is_keyframe) { + int i = 0; + for (i = 0; i < BLOCK_SIZES; ++i) + sf->intra_y_mode_bsize_mask[i] = INTRA_DC; + } + if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && + content != VP9E_CONTENT_SCREEN) { + // More aggressive short circuit for speed 8. + sf->short_circuit_low_temp_var = 2; + } } } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 71ff0ac10..e88a7dfff 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -449,6 +449,10 @@ typedef struct SPEED_FEATURES { // Skip a number of expensive mode evaluations for blocks with very low // temporal variance. + // 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32. + // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL + // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and + // 32x16. int short_circuit_low_temp_var; } SPEED_FEATURES; |