summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c124
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.h8
-rw-r--r--vp9/encoder/vp9_denoiser.c8
-rw-r--r--vp9/encoder/vp9_encodeframe.c6
-rw-r--r--vp9/encoder/vp9_encoder.c13
-rw-r--r--vp9/encoder/vp9_ethread.c4
-rw-r--r--vp9/encoder/vp9_firstpass.c2
-rw-r--r--vp9/encoder/vp9_mbgraph.c6
-rw-r--r--vp9/encoder/vp9_mcomp.c111
-rw-r--r--vp9/encoder/vp9_mcomp.h4
-rw-r--r--vp9/encoder/vp9_pickmode.c23
-rw-r--r--vp9/encoder/vp9_ratectrl.c5
-rw-r--r--vp9/encoder/vp9_temporal_filter.c6
-rw-r--r--vpx_dsp/arm/highbd_idct32x32_135_add_neon.c8
-rw-r--r--vpx_dsp/arm/highbd_idct32x32_34_add_neon.c8
-rw-r--r--vpx_dsp/arm/idct32x32_135_add_neon.c14
-rw-r--r--vpx_dsp/arm/idct32x32_34_add_neon.c14
-rw-r--r--vpx_dsp/arm/idct_neon.h4
18 files changed, 172 insertions, 196 deletions
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index b6b859268..825577746 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -240,24 +240,68 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
}
}
-// Update the actual number of blocks that were applied the segment delta q.
+// From the just encoded frame: update the actual number of blocks that were
+// applied the segment delta q, and the amount of low motion in the frame.
+// Also check conditions for forcing golden update, or preventing golden
+// update if the period is up.
void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
+ MODE_INFO **mi = cm->mi_grid_visible;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ RATE_CONTROL *const rc = &cpi->rc;
unsigned char *const seg_map = cpi->segmentation_map;
+ double fraction_low = 0.0;
+ int force_gf_refresh = 0;
+ int low_content_frame = 0;
int mi_row, mi_col;
cr->actual_num_seg1_blocks = 0;
cr->actual_num_seg2_blocks = 0;
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row++)
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
- if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) ==
- CR_SEGMENT_ID_BOOST1)
+ MV mv = mi[0]->mv[0].as_mv;
+ int map_index = mi_row * cm->mi_cols + mi_col;
+ if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST1)
cr->actual_num_seg1_blocks++;
- else if (cyclic_refresh_segment_id(
- seg_map[mi_row * cm->mi_cols + mi_col]) ==
+ else if (cyclic_refresh_segment_id(seg_map[map_index]) ==
CR_SEGMENT_ID_BOOST2)
cr->actual_num_seg2_blocks++;
+ // Accumulate low_content_frame.
+ if (is_inter_block(mi[0]) && abs(mv.row) < 16 && abs(mv.col) < 16)
+ low_content_frame++;
+ mi++;
+ }
+ mi += 8;
+ }
+ // Check for golden frame update: only for non-SVC and non-golden boost.
+ if (!cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 &&
+ !cpi->oxcf.gf_cbr_boost_pct) {
+ // Force this frame as a golden update frame if this frame changes the
+ // resolution (resize_pending != 0).
+ // TODO(marpan): check on forcing golden update if the background has very
+ // high motion in current frame.
+ if (cpi->resize_pending != 0) {
+ vp9_cyclic_refresh_set_golden_update(cpi);
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ cpi->refresh_golden_frame = 1;
+ force_gf_refresh = 1;
+ }
+ // Update average of low content/motion in the frame.
+ fraction_low = (double)low_content_frame / (cm->mi_rows * cm->mi_cols);
+ cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
+ if (!force_gf_refresh && cpi->refresh_golden_frame == 1 &&
+ rc->frames_since_key > rc->frames_since_golden + 1) {
+ // Don't update golden reference if the amount of low_content for the
+ // current encoded frame is small, or if the recursive average of the
+ // low_content over the update interval window falls below threshold.
+ if (fraction_low < 0.65 || cr->low_content_avg < 0.6) {
+ cpi->refresh_golden_frame = 0;
+ }
+ // Reset for next internal.
+ cr->low_content_avg = fraction_low;
}
+ }
}
// Set golden frame update interval, for non-svc 1 pass CBR mode.
@@ -274,72 +318,6 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) {
if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20;
}
-// Update some encoding stats (from the just encoded frame). If this frame's
-// background has high motion, refresh the golden frame. Otherwise, if the
-// golden reference is to be updated check if we should NOT update the golden
-// ref.
-void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
- VP9_COMMON *const cm = &cpi->common;
- CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- int mi_row, mi_col;
- double fraction_low = 0.0;
- int low_content_frame = 0;
- MODE_INFO **mi = cm->mi_grid_visible;
- RATE_CONTROL *const rc = &cpi->rc;
- const int rows = cm->mi_rows, cols = cm->mi_cols;
- int cnt1 = 0, cnt2 = 0;
- int force_gf_refresh = 0;
- int flag_force_gf_high_motion = 0;
- for (mi_row = 0; mi_row < rows; mi_row++) {
- for (mi_col = 0; mi_col < cols; mi_col++) {
- if (flag_force_gf_high_motion == 1) {
- int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0
- ? mi[0]->mv[0].as_mv.row
- : -1 * mi[0]->mv[0].as_mv.row;
- int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0
- ? mi[0]->mv[0].as_mv.col
- : -1 * mi[0]->mv[0].as_mv.col;
- // Calculate the motion of the background.
- if (abs_mvr <= 16 && abs_mvc <= 16) {
- cnt1++;
- if (abs_mvr == 0 && abs_mvc == 0) cnt2++;
- }
- }
- mi++;
- // Accumulate low_content_frame.
- if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++;
- }
- mi += 8;
- }
- // For video conference clips, if the background has high motion in current
- // frame because of the camera movement, set this frame as the golden frame.
- // Use 70% and 5% as the thresholds for golden frame refreshing.
- // Also, force this frame as a golden update frame if this frame will change
- // the resolution (resize_pending != 0).
- if (cpi->resize_pending != 0 ||
- (cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) {
- vp9_cyclic_refresh_set_golden_update(cpi);
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
-
- if (rc->frames_till_gf_update_due > rc->frames_to_key)
- rc->frames_till_gf_update_due = rc->frames_to_key;
- cpi->refresh_golden_frame = 1;
- force_gf_refresh = 1;
- }
- fraction_low = (double)low_content_frame / (rows * cols);
- // Update average.
- cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4;
- if (!force_gf_refresh && cpi->refresh_golden_frame == 1) {
- // Don't update golden reference if the amount of low_content for the
- // current encoded frame is small, or if the recursive average of the
- // low_content over the update interval window falls below threshold.
- if (fraction_low < 0.8 || cr->low_content_avg < 0.7)
- cpi->refresh_golden_frame = 0;
- // Reset for next internal.
- cr->low_content_avg = fraction_low;
- }
-}
-
// Update the segmentation map, and related quantities: cyclic refresh map,
// refresh sb_index, and target number of blocks to be refreshed.
// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to
@@ -406,7 +384,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
const int bl_index2 = bl_index + y * cm->mi_cols + x;
// If the block is as a candidate for clean up then mark it
// for possible boost/refresh (segment 1). The segment id may get
- // reset to 0 later if block gets coded anything other than ZEROMV.
+ // reset to 0 later depending on the coding mode.
if (cr->map[bl_index2] == 0) {
count_tot++;
if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.h b/vp9/encoder/vp9_aq_cyclicrefresh.h
index c14a647b8..9de5074d9 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -105,15 +105,15 @@ void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi,
// refresh sb_index, and target number of blocks to be refreshed.
void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi);
-// Update the actual number of blocks that were applied the segment delta q.
+// From the just encoded frame: update the actual number of blocks that were
+// applied the segment delta q, and the amount of low motion in the frame.
+// Also check conditions for forcing golden update, or preventing golden
+// update if the period is up.
void vp9_cyclic_refresh_postencode(struct VP9_COMP *const cpi);
// Set golden frame update interval, for non-svc 1 pass CBR mode.
void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *const cpi);
-// Check if we should not update golden reference, based on past refresh stats.
-void vp9_cyclic_refresh_check_golden_update(struct VP9_COMP *const cpi);
-
// Set/update global/frame level refresh parameters.
void vp9_cyclic_refresh_update_parameters(struct VP9_COMP *const cpi);
diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c
index b0cbfa932..55b6a83f3 100644
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -187,7 +187,7 @@ static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row,
}
static VP9_DENOISER_DECISION perform_motion_compensation(
- VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs,
+ VP9_COMMON *const cm, VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs,
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width) {
@@ -199,6 +199,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int i;
struct buf_2d saved_dst[MAX_MB_PLANE];
struct buf_2d saved_pre[MAX_MB_PLANE];
+ RefBuffer *saved_block_refs[2];
frame = ctx->best_reference_frame;
saved_mi = *mi;
@@ -260,6 +261,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
saved_pre[i] = filter_mbd->plane[i].pre[0];
saved_dst[i] = filter_mbd->plane[i].dst;
}
+ saved_block_refs[0] = filter_mbd->block_refs[0];
// Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
// struct.
@@ -289,10 +291,12 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
+ set_ref_ptrs(cm, filter_mbd, frame, NONE);
vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
// Restore everything to its original state
*mi = saved_mi;
+ filter_mbd->block_refs[0] = saved_block_refs[0];
for (i = 0; i < MAX_MB_PLANE; ++i) {
filter_mbd->plane[i].pre[0] = saved_pre[i];
filter_mbd->plane[i].dst = saved_dst[i];
@@ -363,7 +367,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
if (denoiser->denoising_level >= kDenLow)
decision = perform_motion_compensation(
- denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
+ &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width);
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f5f9fa9d9..d914243fe 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -535,7 +535,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
threshold_base = (7 * threshold_base) >> 3;
}
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 &&
+ if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5 &&
cpi->denoiser.denoising_level >= kDenLow)
threshold_base = vp9_scale_part_thresh(
threshold_base, cpi->denoiser.denoising_level, content_state);
@@ -3316,7 +3316,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
MODE_INFO **mi = cm->mi_grid_visible + idx_str;
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
- sb_col_in_tile - 1);
+ sb_col_in_tile);
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
@@ -4094,7 +4094,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
int seg_skip = 0;
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
- sb_col_in_tile - 1);
+ sb_col_in_tile);
x->source_variance = UINT_MAX;
vp9_zero(x->pred_mv);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 5b60c2853..1dc70d2d3 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3180,7 +3180,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
// Avoid scaling last_source unless its needed.
- // Last source is needed if vp9_avg_source_sad() is used, or if
+ // Last source is needed if avg_source_sad() is used, or if
// partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise
// estimation is enabled.
if (cpi->unscaled_last_source != NULL &&
@@ -3290,13 +3290,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
}
- // Update some stats from cyclic refresh, and check if we should not update
- // golden reference, for non-SVC 1 pass CBR.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME &&
- !cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 &&
- (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
- !cpi->oxcf.gf_cbr_boost_pct))
- vp9_cyclic_refresh_check_golden_update(cpi);
+ // Update some stats from cyclic refresh, and check for golden frame update.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
+ cm->frame_type != KEY_FRAME)
+ vp9_cyclic_refresh_postencode(cpi);
// Update the skip mb flag probabilities based on the distribution
// seen in the last encoder iteration.
diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c
index 2b2a556a6..681e960c8 100644
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -320,7 +320,7 @@ void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) {
pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
pthread_mutex_lock(mutex);
- while (c > row_mt_sync->cur_col[r - 1] - nsync) {
+ while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) {
pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
}
pthread_mutex_unlock(mutex);
@@ -349,7 +349,7 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
if (c < cols - 1) {
cur = c;
- if (c % nsync) sig = 0;
+ if (c % nsync != nsync - 1) sig = 0;
} else {
cur = cols + nsync;
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index b51cedd89..222e27a9f 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -884,7 +884,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
const int mb_index = mb_row * cm->mb_cols + mb_col;
#endif
- (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c - 1);
+ (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c);
// Adjust to the next column of MBs.
x->plane[0].src.buf = cpi->Source->y_buffer +
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index e9379f418..46d626def 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -49,6 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
cond_cost_list(cpi, cost_list), ref_mv, dst_mv, 0, 0);
mv_sf->search_method = old_search_method;
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
// Try sub-pixel MC
// if (bestsme > error_thresh && bestsme < INT_MAX)
{
@@ -66,9 +69,6 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16);
- /* restore UMV window */
- x->mv_limits = tmp_mv_limits;
-
return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride,
xd->plane[0].dst.buf, xd->plane[0].dst.stride);
}
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 16426b28e..12dfdc2b9 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -52,6 +52,24 @@ void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
}
+void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
+ const MvLimits *umv_window_limits,
+ const MV *ref_mv) {
+ subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
+ ref_mv->col - MAX_FULL_PEL_VAL * 8);
+ subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
+ ref_mv->col + MAX_FULL_PEL_VAL * 8);
+ subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
+ ref_mv->row - MAX_FULL_PEL_VAL * 8);
+ subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
+ ref_mv->row + MAX_FULL_PEL_VAL * 8);
+
+ subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
+ subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
+ subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
+ subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
+}
+
int vp9_init_search_range(int size) {
int sr = 0;
// Minimum search size no matter what the passed in value.
@@ -267,34 +285,38 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
} \
}
-#define SETUP_SUBPEL_SEARCH \
- const uint8_t *const z = x->plane[0].src.buf; \
- const int src_stride = x->plane[0].src.stride; \
- const MACROBLOCKD *xd = &x->e_mbd; \
- unsigned int besterr = UINT_MAX; \
- unsigned int sse; \
- unsigned int whichdir; \
- int thismse; \
- const unsigned int halfiters = iters_per_step; \
- const unsigned int quarteriters = iters_per_step; \
- const unsigned int eighthiters = iters_per_step; \
- const int y_stride = xd->plane[0].pre[0].stride; \
- const int offset = bestmv->row * y_stride + bestmv->col; \
- const uint8_t *const y = xd->plane[0].pre[0].buf; \
- \
- int rr = ref_mv->row; \
- int rc = ref_mv->col; \
- int br = bestmv->row * 8; \
- int bc = bestmv->col * 8; \
- int hstep = 4; \
- const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); \
- const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); \
- const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); \
- const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); \
- int tr = br; \
- int tc = bc; \
- \
- bestmv->row *= 8; \
+#define SETUP_SUBPEL_SEARCH \
+ const uint8_t *const z = x->plane[0].src.buf; \
+ const int src_stride = x->plane[0].src.stride; \
+ const MACROBLOCKD *xd = &x->e_mbd; \
+ unsigned int besterr = UINT_MAX; \
+ unsigned int sse; \
+ unsigned int whichdir; \
+ int thismse; \
+ const unsigned int halfiters = iters_per_step; \
+ const unsigned int quarteriters = iters_per_step; \
+ const unsigned int eighthiters = iters_per_step; \
+ const int y_stride = xd->plane[0].pre[0].stride; \
+ const int offset = bestmv->row * y_stride + bestmv->col; \
+ const uint8_t *const y = xd->plane[0].pre[0].buf; \
+ \
+ int rr = ref_mv->row; \
+ int rc = ref_mv->col; \
+ int br = bestmv->row * 8; \
+ int bc = bestmv->col * 8; \
+ int hstep = 4; \
+ int minc, maxc, minr, maxr; \
+ int tr = br; \
+ int tc = bc; \
+ MvLimits subpel_mv_limits; \
+ \
+ vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
+ minc = subpel_mv_limits.col_min; \
+ maxc = subpel_mv_limits.col_max; \
+ minr = subpel_mv_limits.row_min; \
+ maxr = subpel_mv_limits.row_max; \
+ \
+ bestmv->row *= 8; \
bestmv->col *= 8;
static unsigned int setup_center_error(
@@ -395,10 +417,6 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
(void)thismse;
(void)cost_list;
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return UINT_MAX;
-
return besterr;
}
@@ -464,10 +482,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
bestmv->row = br;
bestmv->col = bc;
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return UINT_MAX;
-
return besterr;
}
@@ -528,10 +542,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
bestmv->row = br;
bestmv->col = bc;
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return UINT_MAX;
-
return besterr;
}
@@ -614,10 +624,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned(
bestmv->row = br;
bestmv->col = bc;
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return UINT_MAX;
-
return besterr;
}
@@ -653,16 +659,21 @@ uint32_t vp9_find_best_sub_pixel_tree(
int bc = bestmv->col * 8;
int hstep = 4;
int iter, round = 3 - forced_stop;
- const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX);
- const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX);
- const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX);
- const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX);
+
+ int minc, maxc, minr, maxr;
int tr = br;
int tc = bc;
const MV *search_step = search_step_table;
int idx, best_idx = -1;
unsigned int cost_array[5];
int kr, kc;
+ MvLimits subpel_mv_limits;
+
+ vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
+ minc = subpel_mv_limits.col_min;
+ maxc = subpel_mv_limits.col_max;
+ minr = subpel_mv_limits.row_min;
+ maxr = subpel_mv_limits.row_max;
if (!(allow_hp && use_mv_hp(ref_mv)))
if (round == 3) round = 2;
@@ -763,10 +774,6 @@ uint32_t vp9_find_best_sub_pixel_tree(
bestmv->row = br;
bestmv->col = bc;
- if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) ||
- (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3)))
- return UINT_MAX;
-
return besterr;
}
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index d17b8e9bb..443b45136 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -109,6 +109,10 @@ int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int error_per_bit, int *cost_list, const MV *ref_mv,
MV *tmp_mv, int var_max, int rd);
+void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
+ const MvLimits *umv_window_limits,
+ const MV *ref_mv);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 8874a5a41..e3c718d06 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -354,7 +354,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
*sse_y = sse;
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0)
+ if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5)
ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level,
(abs(sum) >> (bw + bh)));
else
@@ -1587,14 +1587,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
-#if CONFIG_VP9_TEMPORAL_DENOISING
- // TODO(marpan): Allowing golden as the (spatial) reference for SVC with
- // denoising causes bad artifact. Remove this condition when artifact issue
- // is resolved.
- if (cpi->use_svc && cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode)
- usable_ref_frame = LAST_FRAME;
-#endif
-
if (cpi->oxcf.speed >= 8 && !cpi->use_svc &&
((cpi->rc.frames_since_golden + 1) < x->last_sb_high_content ||
x->last_sb_high_content > 40))
@@ -1982,7 +1974,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode &&
- cpi->denoiser.denoising_level > kDenLowLow) {
+ cpi->denoiser.denoising_level > kDenLowLow && cpi->oxcf.speed > 5) {
vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx);
// Keep track of zero_last cost.
if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
@@ -2036,13 +2028,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
(!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
svc_force_zero_mode[best_ref_frame - 1]);
-#if CONFIG_VP9_TEMPORAL_DENOISING
- // TODO(marpan): Temporary fix to keep intra prediction on as we currently
- // disallow golden as the (spatial) reference for SVC with denoising due to
- // artifact issue, Remove this condition when artifact issue is resolved.
- if (cpi->use_svc && cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode)
- perform_intra_pred = 1;
-#endif
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
@@ -2193,7 +2178,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 &&
denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow &&
- cpi->denoiser.reset == 0) {
+ cpi->denoiser.reset == 0 && cpi->oxcf.speed > 5) {
VP9_DENOISER_DECISION decision = COPY_BLOCK;
vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost,
frame_mv, reuse_inter_pred, best_tx_size,
@@ -2384,7 +2369,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
}
vp9_set_mv_search_range(&x->mv_limits,
- &mbmi_ext->ref_mvs[0]->as_mv);
+ &mbmi_ext->ref_mvs[ref_frame][0].as_mv);
vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 4500124f9..f79b7c6fc 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -1363,10 +1363,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
RATE_CONTROL *const rc = &cpi->rc;
const int qindex = cm->base_qindex;
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
- vp9_cyclic_refresh_postencode(cpi);
- }
-
// Update rate control heuristics
rc->projected_frame_size = (int)(bytes_used << 3);
@@ -2293,7 +2289,6 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) {
for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) {
// Checker-board pattern, ignore boundary.
- // If the use_source_sad is on, compute for every superblock.
if (((sbi_row > 0 && sbi_col > 0) &&
(sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&
((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 34f6121ea..2b0307f8a 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -226,6 +226,7 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
uint32_t distortion;
uint32_t sse;
int cost_list[5];
+ const MvLimits tmp_mv_limits = x->mv_limits;
MV best_ref_mv1 = { 0, 0 };
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
@@ -246,10 +247,15 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
step_param = mv_sf->reduce_first_step_size;
step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+ vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+
vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
search_method, sadpb, cond_cost_list(cpi, cost_list),
&best_ref_mv1, ref_mv, 0, 0);
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
diff --git a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c
index ca3c3bee4..52f3d43e5 100644
--- a/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c
+++ b/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c
@@ -726,9 +726,10 @@ static void vpx_highbd_idct32_16_neon(const int32_t *const input,
highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd);
}
-void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest,
+void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t temp[32 * 16];
@@ -742,16 +743,15 @@ void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest,
dest += 8;
}
} else {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
int32_t temp[32 * 16];
int32_t *t = temp;
vpx_highbd_idct32_12_neon(input, temp);
vpx_highbd_idct32_12_neon(input + 32 * 8, temp + 8);
for (i = 0; i < 32; i += 8) {
- vpx_highbd_idct32_16_neon(t, dst, stride, bd);
+ vpx_highbd_idct32_16_neon(t, dest, stride, bd);
t += (16 * 8);
- dst += 8;
+ dest += 8;
}
}
}
diff --git a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c
index 6b98ad548..195dcc92d 100644
--- a/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c
+++ b/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c
@@ -594,9 +594,10 @@ static void vpx_highbd_idct32_8_neon(const int32_t *input, uint16_t *output,
highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd);
}
-void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest,
+void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
int i;
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
if (bd == 8) {
int16_t temp[32 * 8];
@@ -610,16 +611,15 @@ void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest,
dest += 8;
}
} else {
- uint16_t *dst = CONVERT_TO_SHORTPTR(dest);
int32_t temp[32 * 8];
int32_t *t = temp;
vpx_highbd_idct32_6_neon(input, t);
for (i = 0; i < 32; i += 8) {
- vpx_highbd_idct32_8_neon(t, dst, stride, bd);
+ vpx_highbd_idct32_8_neon(t, dest, stride, bd);
t += (8 * 8);
- dst += 8;
+ dest += 8;
}
}
}
diff --git a/vpx_dsp/arm/idct32x32_135_add_neon.c b/vpx_dsp/arm/idct32x32_135_add_neon.c
index f8be96874..b39825991 100644
--- a/vpx_dsp/arm/idct32x32_135_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_135_add_neon.c
@@ -371,7 +371,7 @@ void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output) {
vst1q_s16(output, vsubq_s16(s7[0], s6[31]));
}
-void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output,
+void vpx_idct32_16_neon(const int16_t *const input, void *const output,
const int stride, const int highbd_flag) {
int16x8_t in[16], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32],
out[32];
@@ -646,17 +646,17 @@ void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output,
out[31] = final_sub(s7[0], s6[31]);
if (highbd_flag) {
- uint16_t *const outputT = CONVERT_TO_SHORTPTR(output);
- highbd_add_and_store_bd8(out, outputT, stride);
+ highbd_add_and_store_bd8(out, output, stride);
} else {
+ uint8_t *const outputT = (uint8_t *)output;
add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6],
- out[7], output, stride);
+ out[7], outputT, stride);
add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13],
- out[14], out[15], output + (8 * stride), stride);
+ out[14], out[15], outputT + (8 * stride), stride);
add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21],
- out[22], out[23], output + (16 * stride), stride);
+ out[22], out[23], outputT + (16 * stride), stride);
add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29],
- out[30], out[31], output + (24 * stride), stride);
+ out[30], out[31], outputT + (24 * stride), stride);
}
}
diff --git a/vpx_dsp/arm/idct32x32_34_add_neon.c b/vpx_dsp/arm/idct32x32_34_add_neon.c
index 99dd7164b..fc0c4cd84 100644
--- a/vpx_dsp/arm/idct32x32_34_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_34_add_neon.c
@@ -265,7 +265,7 @@ void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output) {
vst1q_s16(output, vsubq_s16(s1[0], s2[31]));
}
-void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride,
+void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride,
const int highbd_flag) {
int16x8_t in[8], s1[32], s2[32], s3[32], out[32];
@@ -486,17 +486,17 @@ void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride,
out[31] = final_sub(s1[0], s2[31]);
if (highbd_flag) {
- uint16_t *const outputT = CONVERT_TO_SHORTPTR(output);
- highbd_add_and_store_bd8(out, outputT, stride);
+ highbd_add_and_store_bd8(out, output, stride);
} else {
+ uint8_t *const outputT = (uint8_t *)output;
add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6],
- out[7], output, stride);
+ out[7], outputT, stride);
add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13],
- out[14], out[15], output + (8 * stride), stride);
+ out[14], out[15], outputT + (8 * stride), stride);
add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21],
- out[22], out[23], output + (16 * stride), stride);
+ out[22], out[23], outputT + (16 * stride), stride);
add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29],
- out[30], out[31], output + (24 * stride), stride);
+ out[30], out[31], outputT + (24 * stride), stride);
}
}
diff --git a/vpx_dsp/arm/idct_neon.h b/vpx_dsp/arm/idct_neon.h
index fc4558c01..27c784edc 100644
--- a/vpx_dsp/arm/idct_neon.h
+++ b/vpx_dsp/arm/idct_neon.h
@@ -982,11 +982,11 @@ void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest,
const int stride, const int highbd_flag);
void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output);
-void vpx_idct32_16_neon(const int16_t *const input, uint8_t *const output,
+void vpx_idct32_16_neon(const int16_t *const input, void *const output,
const int stride, const int highbd_flag);
void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output);
-void vpx_idct32_8_neon(const int16_t *input, uint8_t *output, int stride,
+void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride,
const int highbd_flag);
#endif // VPX_DSP_ARM_IDCT_NEON_H_