diff options
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.c | 124 | ||||
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.h | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_denoiser.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_ethread.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 111 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_temporal_filter.c | 6 | ||||
-rw-r--r-- | vpx_dsp/arm/highbd_idct32x32_135_add_neon.c | 8 | ||||
-rw-r--r-- | vpx_dsp/arm/highbd_idct32x32_34_add_neon.c | 8 | ||||
-rw-r--r-- | vpx_dsp/arm/idct32x32_135_add_neon.c | 14 | ||||
-rw-r--r-- | vpx_dsp/arm/idct32x32_34_add_neon.c | 14 | ||||
-rw-r--r-- | vpx_dsp/arm/idct_neon.h | 4 |
18 files changed, 172 insertions, 196 deletions
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index b6b859268..825577746 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -240,24 +240,68 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, } } -// Update the actual number of blocks that were applied the segment delta q. +// From the just encoded frame: update the actual number of blocks that were +// applied the segment delta q, and the amount of low motion in the frame. +// Also check conditions for forcing golden update, or preventing golden +// update if the period is up. void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; + MODE_INFO **mi = cm->mi_grid_visible; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + RATE_CONTROL *const rc = &cpi->rc; unsigned char *const seg_map = cpi->segmentation_map; + double fraction_low = 0.0; + int force_gf_refresh = 0; + int low_content_frame = 0; int mi_row, mi_col; cr->actual_num_seg1_blocks = 0; cr->actual_num_seg2_blocks = 0; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { - if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) == - CR_SEGMENT_ID_BOOST1) + MV mv = mi[0]->mv[0].as_mv; + int map_index = mi_row * cm->mi_cols + mi_col; + if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST1) cr->actual_num_seg1_blocks++; - else if (cyclic_refresh_segment_id( - seg_map[mi_row * cm->mi_cols + mi_col]) == + else if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST2) cr->actual_num_seg2_blocks++; + // Accumulate low_content_frame. + if (is_inter_block(mi[0]) && abs(mv.row) < 16 && abs(mv.col) < 16) + low_content_frame++; + mi++; + } + mi += 8; + } + // Check for golden frame update: only for non-SVC and non-golden boost. + if (!cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 && + !cpi->oxcf.gf_cbr_boost_pct) { + // Force this frame as a golden update frame if this frame changes the + // resolution (resize_pending != 0). + // TODO(marpan): check on forcing golden update if the background has very + // high motion in current frame. + if (cpi->resize_pending != 0) { + vp9_cyclic_refresh_set_golden_update(cpi); + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + if (rc->frames_till_gf_update_due > rc->frames_to_key) + rc->frames_till_gf_update_due = rc->frames_to_key; + cpi->refresh_golden_frame = 1; + force_gf_refresh = 1; + } + // Update average of low content/motion in the frame. + fraction_low = (double)low_content_frame / (cm->mi_rows * cm->mi_cols); + cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; + if (!force_gf_refresh && cpi->refresh_golden_frame == 1 && + rc->frames_since_key > rc->frames_since_golden + 1) { + // Don't update golden reference if the amount of low_content for the + // current encoded frame is small, or if the recursive average of the + // low_content over the update interval window falls below threshold. + if (fraction_low < 0.65 || cr->low_content_avg < 0.6) { + cpi->refresh_golden_frame = 0; + } + // Reset for next internal. + cr->low_content_avg = fraction_low; } + } } // Set golden frame update interval, for non-svc 1 pass CBR mode. @@ -274,72 +318,6 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20; } -// Update some encoding stats (from the just encoded frame). If this frame's -// background has high motion, refresh the golden frame. Otherwise, if the -// golden reference is to be updated check if we should NOT update the golden -// ref. -void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int mi_row, mi_col; - double fraction_low = 0.0; - int low_content_frame = 0; - MODE_INFO **mi = cm->mi_grid_visible; - RATE_CONTROL *const rc = &cpi->rc; - const int rows = cm->mi_rows, cols = cm->mi_cols; - int cnt1 = 0, cnt2 = 0; - int force_gf_refresh = 0; - int flag_force_gf_high_motion = 0; - for (mi_row = 0; mi_row < rows; mi_row++) { - for (mi_col = 0; mi_col < cols; mi_col++) { - if (flag_force_gf_high_motion == 1) { - int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 - ? mi[0]->mv[0].as_mv.row - : -1 * mi[0]->mv[0].as_mv.row; - int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 - ? mi[0]->mv[0].as_mv.col - : -1 * mi[0]->mv[0].as_mv.col; - // Calculate the motion of the background. - if (abs_mvr <= 16 && abs_mvc <= 16) { - cnt1++; - if (abs_mvr == 0 && abs_mvc == 0) cnt2++; - } - } - mi++; - // Accumulate low_content_frame. - if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++; - } - mi += 8; - } - // For video conference clips, if the background has high motion in current - // frame because of the camera movement, set this frame as the golden frame. - // Use 70% and 5% as the thresholds for golden frame refreshing. - // Also, force this frame as a golden update frame if this frame will change - // the resolution (resize_pending != 0). - if (cpi->resize_pending != 0 || - (cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { - vp9_cyclic_refresh_set_golden_update(cpi); - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - - if (rc->frames_till_gf_update_due > rc->frames_to_key) - rc->frames_till_gf_update_due = rc->frames_to_key; - cpi->refresh_golden_frame = 1; - force_gf_refresh = 1; - } - fraction_low = (double)low_content_frame / (rows * cols); - // Update average. - cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; - if (!force_gf_refresh && cpi->refresh_golden_frame == 1) { - // Don't update golden reference if the amount of low_content for the - // current encoded frame is small, or if the recursive average of the - // low_content over the update interval window falls below threshold. - if (fraction_low < 0.8 || cr->low_content_avg < 0.7) - cpi->refresh_golden_frame = 0; - // Reset for next internal. - cr->low_content_avg = fraction_low; - } -} - // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. // The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to @@ -406,7 +384,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { const int bl_index2 = bl_index + y * cm->mi_cols + x; // If the block is as a candidate for clean up then mark it // for possible boost/refresh (segment 1). The segment id may get - // reset to 0 later if block gets coded anything other than ZEROMV. + // reset to 0 later depending on the coding mode. if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h index c14a647b8..9de5074d9 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -105,15 +105,15 @@ void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, // refresh sb_index, and target number of blocks to be refreshed. void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); -// Update the actual number of blocks that were applied the segment delta q. +// From the just encoded frame: update the actual number of blocks that were +// applied the segment delta q, and the amount of low motion in the frame. +// Also check conditions for forcing golden update, or preventing golden +// update if the period is up. void vp9_cyclic_refresh_postencode(struct VP9_COMP *const cpi); // Set golden frame update interval, for non-svc 1 pass CBR mode. void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *const cpi); -// Check if we should not update golden reference, based on past refresh stats. -void vp9_cyclic_refresh_check_golden_update(struct VP9_COMP *const cpi); - // Set/update global/frame level refresh parameters. void vp9_cyclic_refresh_update_parameters(struct VP9_COMP *const cpi); diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index b0cbfa932..55b6a83f3 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -187,7 +187,7 @@ static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, } static VP9_DENOISER_DECISION perform_motion_compensation( - VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, + VP9_COMMON *const cm, VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width) { @@ -199,6 +199,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int i; struct buf_2d saved_dst[MAX_MB_PLANE]; struct buf_2d saved_pre[MAX_MB_PLANE]; + RefBuffer *saved_block_refs[2]; frame = ctx->best_reference_frame; saved_mi = *mi; @@ -260,6 +261,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( saved_pre[i] = filter_mbd->plane[i].pre[0]; saved_dst[i] = filter_mbd->plane[i].dst; } + saved_block_refs[0] = filter_mbd->block_refs[0]; // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser // struct. @@ -289,10 +291,12 @@ static VP9_DENOISER_DECISION perform_motion_compensation( denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col); filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; + set_ref_ptrs(cm, filter_mbd, frame, NONE); vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); // Restore everything to its original state *mi = saved_mi; + filter_mbd->block_refs[0] = saved_block_refs[0]; for (i = 0; i < MAX_MB_PLANE; ++i) { filter_mbd->plane[i].pre[0] = saved_pre[i]; filter_mbd->plane[i].dst = saved_dst[i]; @@ -363,7 +367,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, if (denoiser->denoising_level >= kDenLow) decision = perform_motion_compensation( - denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, + &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f5f9fa9d9..d914243fe 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -535,7 +535,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, threshold_base = (7 * threshold_base) >> 3; } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && + if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) threshold_base = vp9_scale_part_thresh( threshold_base, cpi->denoiser.denoising_level, content_state); @@ -3316,7 +3316,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, MODE_INFO **mi = cm->mi_grid_visible + idx_str; (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, - sb_col_in_tile - 1); + sb_col_in_tile); if (sf->adaptive_pred_interp_filter) { for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; @@ -4094,7 +4094,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, int seg_skip = 0; (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, - sb_col_in_tile - 1); + sb_col_in_tile); x->source_variance = UINT_MAX; vp9_zero(x->pred_mv); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 5b60c2853..1dc70d2d3 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3180,7 +3180,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } // Avoid scaling last_source unless its needed. - // Last source is needed if vp9_avg_source_sad() is used, or if + // Last source is needed if avg_source_sad() is used, or if // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise // estimation is enabled. if (cpi->unscaled_last_source != NULL && @@ -3290,13 +3290,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } } - // Update some stats from cyclic refresh, and check if we should not update - // golden reference, for non-SVC 1 pass CBR. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && - !cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 && - (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - !cpi->oxcf.gf_cbr_boost_pct)) - vp9_cyclic_refresh_check_golden_update(cpi); + // Update some stats from cyclic refresh, and check for golden frame update. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + cm->frame_type != KEY_FRAME) + vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index 2b2a556a6..681e960c8 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -320,7 +320,7 @@ void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) { pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1]; pthread_mutex_lock(mutex); - while (c > row_mt_sync->cur_col[r - 1] - nsync) { + while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) { pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex); } pthread_mutex_unlock(mutex); @@ -349,7 +349,7 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c, if (c < cols - 1) { cur = c; - if (c % nsync) sig = 0; + if (c % nsync != nsync - 1) sig = 0; } else { cur = cols + nsync; } diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index b51cedd89..222e27a9f 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -884,7 +884,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, const int mb_index = mb_row * cm->mb_cols + mb_col; #endif - (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c - 1); + (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c); // Adjust to the next column of MBs. x->plane[0].src.buf = cpi->Source->y_buffer + diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index e9379f418..46d626def 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -49,6 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, cond_cost_list(cpi, cost_list), ref_mv, dst_mv, 0, 0); mv_sf->search_method = old_search_method; + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) { @@ -66,9 +69,6 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); - /* restore UMV window */ - x->mv_limits = tmp_mv_limits; - return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 16426b28e..12dfdc2b9 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -52,6 +52,24 @@ void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; } +void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, + const MvLimits *umv_window_limits, + const MV *ref_mv) { + subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, + ref_mv->col - MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, + ref_mv->col + MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, + ref_mv->row - MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, + ref_mv->row + MAX_FULL_PEL_VAL * 8); + + subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); + subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); + subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); + subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); +} + int vp9_init_search_range(int size) { int sr = 0; // Minimum search size no matter what the passed in value. @@ -267,34 +285,38 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } \ } -#define SETUP_SUBPEL_SEARCH \ - const uint8_t *const z = x->plane[0].src.buf; \ - const int src_stride = x->plane[0].src.stride; \ - const MACROBLOCKD *xd = &x->e_mbd; \ - unsigned int besterr = UINT_MAX; \ - unsigned int sse; \ - unsigned int whichdir; \ - int thismse; \ - const unsigned int halfiters = iters_per_step; \ - const unsigned int quarteriters = iters_per_step; \ - const unsigned int eighthiters = iters_per_step; \ - const int y_stride = xd->plane[0].pre[0].stride; \ - const int offset = bestmv->row * y_stride + bestmv->col; \ - const uint8_t *const y = xd->plane[0].pre[0].buf; \ - \ - int rr = ref_mv->row; \ - int rc = ref_mv->col; \ - int br = bestmv->row * 8; \ - int bc = bestmv->col * 8; \ - int hstep = 4; \ - const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); \ - const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); \ - const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); \ - const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); \ - int tr = br; \ - int tc = bc; \ - \ - bestmv->row *= 8; \ +#define SETUP_SUBPEL_SEARCH \ + const uint8_t *const z = x->plane[0].src.buf; \ + const int src_stride = x->plane[0].src.stride; \ + const MACROBLOCKD *xd = &x->e_mbd; \ + unsigned int besterr = UINT_MAX; \ + unsigned int sse; \ + unsigned int whichdir; \ + int thismse; \ + const unsigned int halfiters = iters_per_step; \ + const unsigned int quarteriters = iters_per_step; \ + const unsigned int eighthiters = iters_per_step; \ + const int y_stride = xd->plane[0].pre[0].stride; \ + const int offset = bestmv->row * y_stride + bestmv->col; \ + const uint8_t *const y = xd->plane[0].pre[0].buf; \ + \ + int rr = ref_mv->row; \ + int rc = ref_mv->col; \ + int br = bestmv->row * 8; \ + int bc = bestmv->col * 8; \ + int hstep = 4; \ + int minc, maxc, minr, maxr; \ + int tr = br; \ + int tc = bc; \ + MvLimits subpel_mv_limits; \ + \ + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ + minc = subpel_mv_limits.col_min; \ + maxc = subpel_mv_limits.col_max; \ + minr = subpel_mv_limits.row_min; \ + maxr = subpel_mv_limits.row_max; \ + \ + bestmv->row *= 8; \ bestmv->col *= 8; static unsigned int setup_center_error( @@ -395,10 +417,6 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, (void)thismse; (void)cost_list; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -464,10 +482,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -528,10 +542,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -614,10 +624,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -653,16 +659,21 @@ uint32_t vp9_find_best_sub_pixel_tree( int bc = bestmv->col * 8; int hstep = 4; int iter, round = 3 - forced_stop; - const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); - const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); - const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); - const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); + + int minc, maxc, minr, maxr; int tr = br; int tc = bc; const MV *search_step = search_step_table; int idx, best_idx = -1; unsigned int cost_array[5]; int kr, kc; + MvLimits subpel_mv_limits; + + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); + minc = subpel_mv_limits.col_min; + maxc = subpel_mv_limits.col_max; + minr = subpel_mv_limits.row_min; + maxr = subpel_mv_limits.row_max; if (!(allow_hp && use_mv_hp(ref_mv))) if (round == 3) round = 2; @@ -763,10 +774,6 @@ uint32_t vp9_find_best_sub_pixel_tree( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index d17b8e9bb..443b45136 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -109,6 +109,10 @@ int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); +void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, + const MvLimits *umv_window_limits, + const MV *ref_mv); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 8874a5a41..e3c718d06 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -354,7 +354,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, *sse_y = sse; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5) ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level, (abs(sum) >> (bw + bh))); else @@ -1587,14 +1587,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; -#if CONFIG_VP9_TEMPORAL_DENOISING - // TODO(marpan): Allowing golden as the (spatial) reference for SVC with - // denoising causes bad artifact. Remove this condition when artifact issue - // is resolved. - if (cpi->use_svc && cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode) - usable_ref_frame = LAST_FRAME; -#endif - if (cpi->oxcf.speed >= 8 && !cpi->use_svc && ((cpi->rc.frames_since_golden + 1) < x->last_sb_high_content || x->last_sb_high_content > 40)) @@ -1982,7 +1974,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode && - cpi->denoiser.denoising_level > kDenLowLow) { + cpi->denoiser.denoising_level > kDenLowLow && cpi->oxcf.speed > 5) { vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx); // Keep track of zero_last cost. if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) @@ -2036,13 +2028,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && svc_force_zero_mode[best_ref_frame - 1]); -#if CONFIG_VP9_TEMPORAL_DENOISING - // TODO(marpan): Temporary fix to keep intra prediction on as we currently - // disallow golden as the (spatial) reference for SVC with denoising due to - // artifact issue, Remove this condition when artifact issue is resolved. - if (cpi->use_svc && cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode) - perform_intra_pred = 1; -#endif inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && @@ -2193,7 +2178,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 && denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow && - cpi->denoiser.reset == 0) { + cpi->denoiser.reset == 0 && cpi->oxcf.speed > 5) { VP9_DENOISER_DECISION decision = COPY_BLOCK; vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, frame_mv, reuse_inter_pred, best_tx_size, @@ -2384,7 +2369,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, } vp9_set_mv_search_range(&x->mv_limits, - &mbmi_ext->ref_mvs[0]->as_mv); + &mbmi_ext->ref_mvs[ref_frame][0].as_mv); vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 4500124f9..f79b7c6fc 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1363,10 +1363,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { RATE_CONTROL *const rc = &cpi->rc; const int qindex = cm->base_qindex; - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - vp9_cyclic_refresh_postencode(cpi); - } - // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); @@ -2293,7 +2289,6 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) { for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) { // Checker-board pattern, ignore boundary. - // If the use_source_sad is on, compute for every superblock. if (((sbi_row > 0 && sbi_col > 0) && (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) && ((sbi_row % 2 == 0 && sbi_col % 2 == 0) || diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 34f6121ea..2b0307f8a 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -226,6 +226,7 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, uint32_t distortion; uint32_t sse; int cost_list[5]; + const MvLimits tmp_mv_limits = x->mv_limits; MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -246,10 +247,15 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, ref_mv, 0, 0); + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, diff --git a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c index ca3c3bee4..52f3d43e5 100644 --- a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c @@ -726,9 +726,10 @@ static void vpx_highbd_idct32_16_neon(const int32_t *const input, highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd); } -void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest, +void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); if (bd == 8) { int16_t temp[32 * 16]; @@ -742,16 +743,15 @@ void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest, dest += 8; } } else { - uint16_t *dst = CONVERT_TO_SHORTPTR(dest); int32_t temp[32 * 16]; int32_t *t = temp; vpx_highbd_idct32_12_neon(input, temp); vpx_highbd_idct32_12_neon(input + 32 * 8, temp + 8); for (i = 0; i < 32; i += 8) { - vpx_highbd_idct32_16_neon(t, dst, stride, bd); + vpx_highbd_idct32_16_neon(t, dest, stride, bd); t += (16 * 8); - dst += 8; + dest += 8; } } } diff --git a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c index 6b98ad548..195dcc92d 100644 --- a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c +++ b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c @@ -594,9 +594,10 @@ static void vpx_highbd_idct32_8_neon(const int32_t *input, uint16_t *output, highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd); } -void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest, +void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8, int stride, int bd) { int i; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); if (bd == 8) { int16_t temp[32 * 8]; @@ -610,16 +611,15 @@ void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest, dest += 8; } } else { - uint16_t *dst = CONVERT_TO_SHORTPTR(dest); int32_t temp[32 * 8]; int32_t *t = temp; vpx_highbd_idct32_6_neon(input, t); for (i = 0; i < 32; i += 8) { - vpx_highbd_idct32_8_neon(t, dst, stride, bd); + vpx_highbd_idct32_8_neon(t, dest, stride, bd); t += (8 * 8); - dst += 8; + dest += 8; } } } diff --git a/vpx_dsp/arm/idct32x32_135_add_neon.c b/vpx_dsp/arm/idct32x32_135_add_neon.c index f8be96874..b39825991 100644 --- a/vpx_dsp/arm/idct32x32_135_add_neon.c +++ b/vpx_dsp/arm/idct32x32_135_add_neon.c @@ -371,7 +371,7 @@ void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output) { vst1q_s16(output, vsubq_s16(s7[0], s6[31])); } -void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output, +void vpx_idct32_16_neon(const int16_t *const input, void *const output, const int stride, const int highbd_flag) { int16x8_t in[16], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], out[32]; @@ -646,17 +646,17 @@ void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output, out[31] = final_sub(s7[0], s6[31]); if (highbd_flag) { - uint16_t *const outputT = CONVERT_TO_SHORTPTR(output); - highbd_add_and_store_bd8(out, outputT, stride); + highbd_add_and_store_bd8(out, output, stride); } else { + uint8_t *const outputT = (uint8_t *)output; add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6], - out[7], output, stride); + out[7], outputT, stride); add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13], - out[14], out[15], output + (8 * stride), stride); + out[14], out[15], outputT + (8 * stride), stride); add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21], - out[22], out[23], output + (16 * stride), stride); + out[22], out[23], outputT + (16 * stride), stride); add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29], - out[30], out[31], output + (24 * stride), stride); + out[30], out[31], outputT + (24 * stride), stride); } } diff --git a/vpx_dsp/arm/idct32x32_34_add_neon.c b/vpx_dsp/arm/idct32x32_34_add_neon.c index 99dd7164b..fc0c4cd84 100644 --- a/vpx_dsp/arm/idct32x32_34_add_neon.c +++ b/vpx_dsp/arm/idct32x32_34_add_neon.c @@ -265,7 +265,7 @@ void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output) { vst1q_s16(output, vsubq_s16(s1[0], s2[31])); } -void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride, +void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, const int highbd_flag) { int16x8_t in[8], s1[32], s2[32], s3[32], out[32]; @@ -486,17 +486,17 @@ void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride, out[31] = final_sub(s1[0], s2[31]); if (highbd_flag) { - uint16_t *const outputT = CONVERT_TO_SHORTPTR(output); - highbd_add_and_store_bd8(out, outputT, stride); + highbd_add_and_store_bd8(out, output, stride); } else { + uint8_t *const outputT = (uint8_t *)output; add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6], - out[7], output, stride); + out[7], outputT, stride); add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13], - out[14], out[15], output + (8 * stride), stride); + out[14], out[15], outputT + (8 * stride), stride); add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21], - out[22], out[23], output + (16 * stride), stride); + out[22], out[23], outputT + (16 * stride), stride); add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29], - out[30], out[31], output + (24 * stride), stride); + out[30], out[31], outputT + (24 * stride), stride); } } diff --git a/vpx_dsp/arm/idct_neon.h b/vpx_dsp/arm/idct_neon.h index fc4558c01..27c784edc 100644 --- a/vpx_dsp/arm/idct_neon.h +++ b/vpx_dsp/arm/idct_neon.h @@ -982,11 +982,11 @@ void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest, const int stride, const int highbd_flag); void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output); -void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output, +void vpx_idct32_16_neon(const int16_t *const input, void *const output, const int stride, const int highbd_flag); void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output); -void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride, +void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, const int highbd_flag); #endif // VPX_DSP_ARM_IDCT_NEON_H_ |