diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 74 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 353 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 15 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 7 | ||||
-rw-r--r-- | vp9/encoder/x86/temporal_filter_sse4.c | 4 | ||||
-rw-r--r-- | vp9/vp9_common.mk | 3 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 3 |
12 files changed, 414 insertions, 77 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 5adefac1a..679a62b5a 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3440,6 +3440,45 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, #undef FEATURES #undef LABELS +// Perform fast and coarse motion search for the given block. This is a +// pre-processing step for the ML based partition search speedup. +static void simple_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, + int mi_row, int mi_col, MV ref_mv, + MV_REFERENCE_FRAME ref, uint8_t *pred_buf) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref); + const int step_param = 1; + const MvLimits tmp_mv_limits = x->mv_limits; + const SEARCH_METHODS search_method = NSTEP; + const int sadpb = x->sadperbit16; + MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; + MV best_mv = { 0, 0 }; + int cost_list[5]; + + assert(yv12 != NULL); + if (!yv12) return; + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[ref - 1].sf); + mi->ref_frame[0] = ref; + mi->ref_frame[1] = NONE; + mi->sb_type = bsize; + vp9_set_mv_search_range(&x->mv_limits, &ref_mv); + vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, + sadpb, cond_cost_list(cpi, cost_list), &ref_mv, + &best_mv, 0, 0); + best_mv.row *= 8; + best_mv.col *= 8; + x->mv_limits = tmp_mv_limits; + mi->mv[0].as_mv = best_mv; + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + xd->plane[0].dst.buf = pred_buf; + xd->plane[0].dst.stride = 64; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); +} + // Use a neural net model to prune partition-none and partition-split search. // The model uses prediction residue variance and quantization step size as // input features. @@ -3448,10 +3487,9 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int *none, int *split) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0]; const NN_CONFIG *nn_config = NULL; #if CONFIG_VP9_HIGHBITDEPTH + MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (CONVERT_TO_BYTEPTR(pred_buffer)) @@ -3489,41 +3527,13 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, if (!nn_config) return; - mi->ref_frame[1] = NONE; - mi->sb_type = bsize; // Do a simple single motion search to find a prediction for current block. // The variance of the residue will be used as input features. { + const MV ref_mv = { 0, 0 }; const MV_REFERENCE_FRAME ref = cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; - YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref); - MV ref_mv = { 0, 0 }; - MV ref_mv_full = { 0, 0 }; - const int step_param = 1; - const MvLimits tmp_mv_limits = x->mv_limits; - const SEARCH_METHODS search_method = NSTEP; - const int sadpb = x->sadperbit16; - MV best_mv = { 0, 0 }; - int cost_list[5]; - - assert(yv12 != NULL); - if (!yv12) return; - vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, - &cm->frame_refs[ref - 1].sf); - mi->ref_frame[0] = ref; - vp9_set_mv_search_range(&x->mv_limits, &ref_mv); - vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, - search_method, sadpb, cond_cost_list(cpi, cost_list), - &ref_mv, &best_mv, 0, 0); - best_mv.row *= 8; - best_mv.col *= 8; - x->mv_limits = tmp_mv_limits; - mi->mv[0].as_mv = best_mv; - - set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); - xd->plane[0].dst.buf = pred_buf; - xd->plane[0].dst.stride = 64; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); } vpx_clear_system_state(); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 11037cd14..eaeb3d96e 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -29,6 +29,9 @@ #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_idct.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/common/vp9_mvref_common.h" +#endif #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif @@ -2570,6 +2573,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_free(cpi->feature_score_loc_arr); vpx_free(cpi->feature_score_loc_sort); vpx_free(cpi->feature_score_loc_heap); + vpx_free(cpi->select_mv_arr); #endif for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { #if CONFIG_NON_GREEDY_MV @@ -5838,31 +5842,6 @@ static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, } } -#if CONFIG_NON_GREEDY_MV -double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) { - double IxIx = 0; - double IxIy = 0; - double IyIy = 0; - double score; - int r, c; - vpx_clear_system_state(); - for (r = 0; r + 1 < rows; ++r) { - for (c = 0; c + 1 < cols; ++c) { - int diff_x = buf[r * stride + c] - buf[r * stride + c + 1]; - int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c]; - IxIx += diff_x * diff_x; - IxIy += diff_x * diff_y; - IyIy += diff_y * diff_y; - } - } - IxIx /= (rows - 1) * (cols - 1); - IxIy /= (rows - 1) * (cols - 1); - IyIy /= (rows - 1) * (cols - 1); - score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001); - return score; -} -#endif - static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row, int mi_col) { x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); @@ -6035,6 +6014,326 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } #if CONFIG_NON_GREEDY_MV +static void get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, + int frame_idx, int rf_idx, int mi_row, + int mi_col, struct buf_2d *src, + struct buf_2d *pre) { + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + YV12_BUFFER_CONFIG *ref_frame = NULL; + int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + ref_frame = gf_picture[ref_frame_idx].frame; + } + src->buf = xd->cur_buf->y_buffer + mb_y_offset; + src->stride = xd->cur_buf->y_stride; + pre->buf = ref_frame->y_buffer + mb_y_offset; + pre->stride = ref_frame->y_stride; + assert(src->stride == pre->stride); +} + +#define kMvPreCheckLines 5 +#define kMvPreCheckSize 15 +#define ZERO_MV_MODE 0 +#define NEW_MV_MODE 1 +#define NEAREST_MV_MODE 2 +#define NEAR_MV_MODE 3 +#define MAX_MV_MODE 4 + +#define MV_REF_POS_NUM 3 +POSITION mv_ref_pos[MV_REF_POS_NUM] = { + { -1, 0 }, + { 0, -1 }, + { -1, -1 }, +}; + +static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row, + int mi_col) { + return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col]; +} + +static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + int i; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int_mv nearest_mv, near_mv, invalid_mv; + nearest_mv.as_int = INVALID_MV; + near_mv.as_int = INVALID_MV; + invalid_mv.as_int = INVALID_MV; + for (i = 0; i < MV_REF_POS_NUM; ++i) { + int nb_row = mi_row + mv_ref_pos[i].row * mi_height; + int nb_col = mi_col + mv_ref_pos[i].col * mi_width; + assert(mv_ref_pos[i].row <= 0); + assert(mv_ref_pos[i].col <= 0); + if (nb_row >= 0 && nb_col >= 0) { + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + } else { + int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + if (mv.as_int == nearest_mv.as_int) { + continue; + } else { + near_mv = mv; + break; + } + } + } + } + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv.as_mv.row = 0; + nearest_mv.as_mv.col = 0; + } + if (near_mv.as_int == INVALID_MV) { + near_mv.as_mv.row = 0; + near_mv.as_mv.col = 0; + } + if (mv_mode == NEAREST_MV_MODE) { + return nearest_mv; + } + if (mv_mode == NEAR_MV_MODE) { + return near_mv; + } + assert(0); + return invalid_mv; +} + +static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + int_mv mv; + switch (mv_mode) { + case ZERO_MV_MODE: + mv.as_mv.row = 0; + mv.as_mv.col = 0; + break; + case NEW_MV_MODE: + mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); + break; + case NEAREST_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + case NEAR_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + default: + mv.as_int = INVALID_MV; + assert(0); + break; + } + return mv; +} + +static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, + int mi_row, int mi_col, int_mv *mv) { + uint32_t sse; + struct buf_2d src; + struct buf_2d pre; + MV full_mv; + *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, + mi_col); + full_mv = get_full_mv(&mv->as_mv); + get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, + &src, &pre); + // TODO(angiebird): Consider subpixel when computing the sse. + cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), + pre.stride, &sse); + return (double)sse; +} + +static double get_mv_cost(int mv_mode) { + // TODO(angiebird): Implement this function. + (void)mv_mode; + return 0; +} + +static double rd_cost(int rdmult, int rddiv, double rate, double dist) { + return (rate * rdmult) / (1 << 9) + dist * (1 << rddiv); +} + +static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, + int mi_row, int mi_col, int_mv *mv) { + MACROBLOCKD *xd = &x->e_mbd; + double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); + double mv_cost = get_mv_cost(mv_mode); + return rd_cost(x->rdmult, x->rddiv, mv_cost, mv_dist); +} + +static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + double *rd, int_mv *mv) { + int best_mv_mode = ZERO_MV_MODE; + int update = 0; + int mv_mode; + *rd = 0; + for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) { + double this_rd; + int_mv this_mv; + if (mv_mode == NEW_MV_MODE) { + continue; + } + this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col, &this_mv); + if (update == 0) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + update = 1; + } else { + if (this_rd < *rd) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + } + } + } + return best_mv_mode; +} + +static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + double *rd) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int tmp_mv_mode_arr[kMvPreCheckSize]; + int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx]; + int_mv *select_mv_arr = cpi->select_mv_arr; + int_mv tmp_select_mv_arr[kMvPreCheckSize]; + int stride = tpl_frame->stride; + double new_mv_rd = 0; + double no_new_mv_rd = 0; + int idx; + int tmp_idx; + assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1); + + // no new mv + // diagnal scan order + tmp_idx = 0; + for (idx = 0; idx < kMvPreCheckSize; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = + find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + no_new_mv_rd += this_rd; + tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col]; + tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col]; + ++tmp_idx; + } + } + } + + // new mv + mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE; + new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, + &select_mv_arr[mi_row * stride + mi_col]); + // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE + // beforehand. + for (idx = 1; idx < kMvPreCheckSize; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = + find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + new_mv_rd += this_rd; + } + } + } + + // update best_mv_mode + tmp_idx = 0; + if (no_new_mv_rd < new_mv_rd) { + *rd = no_new_mv_rd; + for (idx = 0; idx < kMvPreCheckSize; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx]; + select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx]; + ++tmp_idx; + } + } + } + } else { + *rd = new_mv_rd; + } +} + +void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture, + int frame_idx, TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int unit_rows = tpl_frame->mi_rows / mi_height; + const int unit_cols = tpl_frame->mi_cols / mi_width; + const int max_diagonal_lines = unit_rows + unit_cols - 1; + int idx; + for (idx = 0; idx < max_diagonal_lines; ++idx) { + int r; + for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1); + ++r) { + double rd; // TODO(angiebird): Use this information later. + int c = idx - r; + int mi_row = r * mi_height; + int mi_col = c * mi_width; + assert(c >= 0 && c < unit_cols); + assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows); + assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols); + predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize, + mi_row, mi_col, &rd); + } + } +} + +static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, + int cols) { + double IxIx = 0; + double IxIy = 0; + double IyIy = 0; + double score; + int r, c; + vpx_clear_system_state(); + for (r = 0; r + 1 < rows; ++r) { + for (c = 0; c + 1 < cols; ++c) { + int diff_x = buf[r * stride + c] - buf[r * stride + c + 1]; + int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c]; + IxIx += diff_x * diff_x; + IxIy += diff_x * diff_y; + IyIy += diff_y * diff_y; + } + } + IxIx /= (rows - 1) * (cols - 1); + IxIy /= (rows - 1) * (cols - 1); + IyIy /= (rows - 1) * (cols - 1); + score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001); + return score; +} + static int compare_feature_score(const void *a, const void *b) { const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; @@ -6460,6 +6759,10 @@ static void init_tpl_buffer(VP9_COMP *cpi) { cpi->feature_score_loc_alloc = 1; } + vpx_free(cpi->select_mv_arr); + CHECK_MEM_ERROR( + cm, cpi->select_mv_arr, + vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr))); #endif // TODO(jingning): Reduce the actual memory use for tpl model build up. diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index cb9ea2de2..a690ebc73 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -320,6 +320,7 @@ typedef struct TplDepFrame { double mv_dist_sum[3]; double mv_cost_sum[3]; int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES]; + int *mv_mode_arr[3]; #endif } TplDepFrame; @@ -590,6 +591,7 @@ typedef struct VP9_COMP { FEATURE_SCORE_LOC *feature_score_loc_arr; FEATURE_SCORE_LOC **feature_score_loc_sort; FEATURE_SCORE_LOC **feature_score_loc_heap; + int_mv *select_mv_arr; #endif TileDataEnc *tile_data; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 8f0da48a2..5cfffe6b5 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -549,7 +549,7 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { } #define FP_DN_THRESH 8 -#define FP_MAX_DN_THRESH 16 +#define FP_MAX_DN_THRESH 24 #define KERNEL_SIZE 3 // Baseline Kernal weights for first pass noise metric @@ -843,6 +843,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, double mb_intra_factor; double mb_brightness_factor; double mb_neutral_count; + int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); @@ -1254,7 +1255,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, } } #endif - // Does the row vector point inwards or outwards? if (mb_row < cm->mb_rows / 2) { if (mv.row > 0) @@ -1280,14 +1280,13 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, else if (mv.col < 0) --(fp_acc_data->sum_in_vectors); } - fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; - } else if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) { + } + if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); - } else { // 0,0 mv but high error + } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; } } else { // Intra < inter error - int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); if (this_motion_error < scaled_low_intra_thresh) { @@ -2399,8 +2398,12 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, twopass->arnr_strength_adjustment = 0; - if ((section_zeromv < 0.10) || (section_noise <= (SECTION_NOISE_DEF * 0.75))) + if (section_noise < 150) { twopass->arnr_strength_adjustment -= 1; + if (section_noise < 75) twopass->arnr_strength_adjustment -= 1; + } else if (section_noise > 250) + twopass->arnr_strength_adjustment += 1; + if (section_zeromv > 0.50) twopass->arnr_strength_adjustment += 1; } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 534b15acc..602cc5798 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -29,11 +29,6 @@ // #define NEW_DIAMOND_SEARCH -static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, - const MV *mv) { - return &buf->buf[mv->row * buf->stride + mv->col]; -} - void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 6bef88747..779e8d8e7 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -38,6 +38,11 @@ typedef struct search_site_config { int total_steps; } search_site_config; +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); void vp9_init3smotion_compensation(search_site_config *cfg, int stride); @@ -143,7 +148,6 @@ static INLINE MV get_full_mv(const MV *mv) { out_mv.col = mv->col >> 3; return out_mv; } - struct TplDepFrame; void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row, int mi_col, int rf_idx, BLOCK_SIZE bsize, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index a3240513f..0fdc61649 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1683,6 +1683,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; int gf_temporal_ref = 0; + int force_test_gf_zeromv = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; @@ -1939,6 +1940,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, flag_svc_subpel = 1; } + // For SVC with quality layers, when QP of lower layer is lower + // than current layer: force check of GF-ZEROMV before early exit + // due to skip flag. + if (svc->spatial_layer_id > 0 && usable_ref_frame == GOLDEN_FRAME && + no_scaling && cm->base_qindex > svc->lower_layer_qindex + 10) + force_test_gf_zeromv = 1; + for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { int rate_mv = 0; int mode_rd_thresh; @@ -2349,11 +2357,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } - if (x->skip) break; + if (x->skip && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) + break; // If early termination flag is 1 and at least 2 modes are checked, // the mode search is terminated. - if (best_early_term && idx > 0 && !scene_change_detected) { + if (best_early_term && idx > 0 && !scene_change_detected && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) { x->skip = 1; break; } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c1a079ff0..c73b0ed87 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -3034,7 +3034,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, if (content_type == VP9E_CONTENT_FILM) { if (src_rec_min <= VERY_LOW_VAR_THRESH) { if (ref_frame == INTRA_FRAME) *this_rd *= 2; - if (bsize > 6) *this_rd *= 2; + if (bsize > BLOCK_16X16) *this_rd *= 2; } } } diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 04b589ac3..ee5f0e56c 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -192,7 +192,8 @@ static INLINE int mod_index(int sum_dist, int index, int rounding, int strength, assert(index >= 0 && index <= 13); assert(index_mult[index] != 0); - mod = (clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; + mod = + ((unsigned int)clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; mod += rounding; mod >>= strength; @@ -680,7 +681,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); #endif // CONFIG_VP9_HIGHBITDEPTH - if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2); + if (src_variance <= 2) { + strength = VPXMAX(0, arnr_filter_data->strength - 2); + } } for (frame = 0; frame < frame_count; frame++) { diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c index 18c4f02d5..b560e2218 100644 --- a/vp9/encoder/x86/temporal_filter_sse4.c +++ b/vp9/encoder/x86/temporal_filter_sse4.c @@ -967,8 +967,8 @@ static void vp9_apply_temporal_filter_chroma_8( v_sum_row_2 = v_sum_row_3; // Add chroma values - u_sum_row = _mm_adds_epu8(u_sum_row_1, u_sum_row_2); - v_sum_row = _mm_adds_epu8(v_sum_row_1, v_sum_row_2); + u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); + v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); // Add luma values add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 7ca4004b0..c9a55669e 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -64,9 +64,12 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c +endif # !CONFIG_VP9_HIGHBITDEPTH + VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index e16152a6d..67e5389a7 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -138,10 +138,13 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_frame_scale_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h +endif # !CONFIG_VP9_HIGHBITDEPTH VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c |