summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c3
-rw-r--r--vp9/encoder/vp9_block.h2
-rw-r--r--vp9/encoder/vp9_encodeframe.c17
-rw-r--r--vp9/encoder/vp9_firstpass.c73
-rw-r--r--vp9/encoder/vp9_onyx_if.c126
-rw-r--r--vp9/encoder/vp9_ratectrl.c3
-rw-r--r--vp9/encoder/vp9_ratectrl.h5
-rw-r--r--vp9/encoder/vp9_rdopt.c117
-rw-r--r--vp9/encoder/vp9_rdopt.h4
-rw-r--r--vp9/encoder/vp9_temporal_filter.c90
-rw-r--r--vp9/encoder/vp9_variance.c343
-rw-r--r--vp9/encoder/vp9_variance.h12
12 files changed, 366 insertions, 429 deletions
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 323c10350..d1437d377 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -136,7 +136,8 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
const int xmis = MIN(cm->mi_cols - mi_col, bw);
const int ymis = MIN(cm->mi_rows - mi_row, bh);
const int block_index = mi_row * cm->mi_cols + mi_col;
- const int refresh_this_block = candidate_refresh_aq(cr, mbmi, bsize, use_rd);
+ const int refresh_this_block = cpi->mb.in_static_area ||
+ candidate_refresh_aq(cr, mbmi, bsize, use_rd);
// Default is to not update the refresh map.
int new_map_value = cr->map[block_index];
int x = 0; int y = 0;
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index cad124125..fcf2a0420 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -162,6 +162,8 @@ struct macroblock {
// note that token_costs is the cost when eob node is skipped
vp9_coeff_cost token_costs[TX_SIZES];
+ int in_static_area;
+
int optimize;
// indicate if it is in the rd search loop or encoding process
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f88ce2d1a..9956acc0b 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1273,6 +1273,7 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, int mi_col) {
+ MACROBLOCK *x = &cpi->mb;
uint8_t *src, *pre;
int src_stride, pre_stride;
@@ -1304,7 +1305,8 @@ static int is_background(VP9_COMP *cpi, const TileInfo *const tile,
threshold = (row8x8_remaining * col8x8_remaining) << 6;
}
- return (this_sad < 2 * threshold);
+ x->in_static_area = (this_sad < 2 * threshold);
+ return x->in_static_area;
}
static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
@@ -2433,6 +2435,11 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
set_offsets(cpi, tile, mi_row, mi_col, bsize);
xd->mi[0]->mbmi.sb_type = bsize;
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ if (xd->mi[0]->mbmi.segment_id && x->in_static_area)
+ x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
+ }
+
if (!frame_is_intra_only(cm)) {
vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col,
rate, dist, bsize);
@@ -2856,7 +2863,8 @@ static void nonrd_use_partition(VP9_COMP *cpi,
static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *cm = &cpi->common;
- MACROBLOCKD *xd = &cpi->mb.e_mbd;
+ MACROBLOCK *x = &cpi->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
int mi_col;
// Initialize the left context for the new SB row
@@ -2874,8 +2882,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
BLOCK_SIZE bsize;
- cpi->mb.source_variance = UINT_MAX;
- vp9_zero(cpi->mb.pred_mv);
+ x->in_static_area = 0;
+ x->source_variance = UINT_MAX;
+ vp9_zero(x->pred_mv);
// Set the partition type of the 64X64 block
switch (cpi->sf.partition_search_type) {
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index f9dba9689..1dacfaaae 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -61,7 +61,7 @@
#define MIN_GF_INTERVAL 4
#endif
-#define DISABLE_RC_LONG_TERM_MEM
+#define LONG_TERM_VBR_CORRECTION
static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) {
YV12_BUFFER_CONFIG temp = *a;
@@ -1033,6 +1033,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
reset_fpf_position(twopass, start_pos);
}
+
+ // Reset the vbr bits off target counter
+ cpi->rc.vbr_bits_off_target = 0;
}
// This function gives an estimate of how badly we believe the prediction
@@ -2192,6 +2195,23 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
cpi->rc.frames_to_key = INT_MAX;
}
+// For VBR...adjustment to the frame target based on error from previous frames
+void vbr_rate_correction(int * this_frame_target,
+ const int64_t vbr_bits_off_target) {
+ int max_delta = (*this_frame_target * 15) / 100;
+
+ // vbr_bits_off_target > 0 means we have extra bits to spend
+ if (vbr_bits_off_target > 0) {
+ *this_frame_target +=
+ (vbr_bits_off_target > max_delta) ? max_delta
+ : (int)vbr_bits_off_target;
+ } else {
+ *this_frame_target -=
+ (vbr_bits_off_target < -max_delta) ? max_delta
+ : (int)-vbr_bits_off_target;
+ }
+}
+
void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2219,8 +2239,15 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
return;
if (cpi->refresh_alt_ref_frame) {
+ int modified_target = twopass->gf_bits;
+ rc->base_frame_target = twopass->gf_bits;
cm->frame_type = INTER_FRAME;
- vp9_rc_set_frame_target(cpi, twopass->gf_bits);
+#ifdef LONG_TERM_VBR_CORRECTION
+ // Correction to rate target based on prior over or under shoot.
+ if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+ vbr_rate_correction(&modified_target, rc->vbr_bits_off_target);
+#endif
+ vp9_rc_set_frame_target(cpi, modified_target);
return;
}
@@ -2315,6 +2342,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
target = vp9_rc_clamp_iframe_target_size(cpi, rc->this_frame_target);
else
target = vp9_rc_clamp_pframe_target_size(cpi, rc->this_frame_target);
+
+ rc->base_frame_target = target;
+#ifdef LONG_TERM_VBR_CORRECTION
+ // Correction to rate target based on prior over or under shoot.
+ if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
+ vbr_rate_correction(&target, rc->vbr_bits_off_target);
+#endif
vp9_rc_set_frame_target(cpi, target);
// Update the total stats remaining structure.
@@ -2322,20 +2356,45 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
}
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
-#ifdef DISABLE_RC_LONG_TERM_MEM
- const uint64_t bits_used = cpi->rc.this_frame_target;
+ RATE_CONTROL *const rc = &cpi->rc;
+#ifdef LONG_TERM_VBR_CORRECTION
+ // In this experimental mode, the VBR correction is done exclusively through
+ // rc->vbr_bits_off_target. Based on the sign of this value, a limited %
+ // adjustment is made to the target rate of subsequent frames, to try and
+ // push it back towards 0. This mode is less likely to suffer from
+ // extreme behaviour at the end of a clip or group of frames.
+ const int bits_used = rc->base_frame_target;
+ rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size;
#else
- const uint64_t bits_used = cpi->rc.projected_frame_size;
+ // In this mode, VBR correction is acheived by altering bits_left,
+ // kf_group_bits & gf_group_bits to reflect any deviation from the target
+ // rate in this frame. This alters the allocation of bits to the
+ // remaning frames in the group / clip.
+ //
+ // This method can give rise to unstable behaviour near the end of a clip
+ // or kf/gf group of frames where any accumulated error is corrected over an
+ // ever decreasing number of frames. Hence we change the balance of target
+ // vs. actual bitrate gradually as we progress towards the end of the
+ // sequence in order to mitigate this effect.
+ const double progress =
+ (double)(cpi->twopass.stats_in - cpi->twopass.stats_in_start) /
+ (cpi->twopass.stats_in_end - cpi->twopass.stats_in_start);
+ const int bits_used = progress * cpi->rc.this_frame_target +
+ (1.0 - progress) * cpi->rc.projected_frame_size;
#endif
+
cpi->twopass.bits_left -= bits_used;
cpi->twopass.bits_left = MAX(cpi->twopass.bits_left, 0);
- // Update bits left to the kf and gf groups to account for overshoot or
- // undershoot on these frames.
+
+#ifdef LONG_TERM_VBR_CORRECTION
+ if (cpi->common.frame_type != KEY_FRAME) {
+#else
if (cpi->common.frame_type == KEY_FRAME) {
// For key frames kf_group_bits already had the target bits subtracted out.
// So now update to the correct value based on the actual bits used.
cpi->twopass.kf_group_bits += cpi->rc.this_frame_target - bits_used;
} else {
+#endif
cpi->twopass.kf_group_bits -= bits_used;
cpi->twopass.gf_group_bits -= bits_used;
cpi->twopass.gf_group_bits = MAX(cpi->twopass.gf_group_bits, 0);
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 1aa250e8b..23a68729b 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -373,122 +373,6 @@ static void update_reference_segmentation_map(VP9_COMP *cpi) {
}
}
-static void set_rd_speed_thresholds(VP9_COMP *cpi) {
- int i;
- RD_OPT *const rd = &cpi->rd;
-
- // Set baseline threshold values
- for (i = 0; i < MAX_MODES; ++i)
- rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
-
- rd->thresh_mult[THR_NEARESTMV] = 0;
- rd->thresh_mult[THR_NEARESTG] = 0;
- rd->thresh_mult[THR_NEARESTA] = 0;
-
- rd->thresh_mult[THR_DC] += 1000;
-
- rd->thresh_mult[THR_NEWMV] += 1000;
- rd->thresh_mult[THR_NEWA] += 1000;
- rd->thresh_mult[THR_NEWG] += 1000;
-
- rd->thresh_mult[THR_NEARMV] += 1000;
- rd->thresh_mult[THR_NEARA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
- rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
- rd->thresh_mult[THR_TM] += 1000;
-
- rd->thresh_mult[THR_COMP_NEARLA] += 1500;
- rd->thresh_mult[THR_COMP_NEWLA] += 2000;
- rd->thresh_mult[THR_NEARG] += 1000;
- rd->thresh_mult[THR_COMP_NEARGA] += 1500;
- rd->thresh_mult[THR_COMP_NEWGA] += 2000;
-
- rd->thresh_mult[THR_ZEROMV] += 2000;
- rd->thresh_mult[THR_ZEROG] += 2000;
- rd->thresh_mult[THR_ZEROA] += 2000;
- rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
- rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
-
- rd->thresh_mult[THR_H_PRED] += 2000;
- rd->thresh_mult[THR_V_PRED] += 2000;
- rd->thresh_mult[THR_D45_PRED ] += 2500;
- rd->thresh_mult[THR_D135_PRED] += 2500;
- rd->thresh_mult[THR_D117_PRED] += 2500;
- rd->thresh_mult[THR_D153_PRED] += 2500;
- rd->thresh_mult[THR_D207_PRED] += 2500;
- rd->thresh_mult[THR_D63_PRED] += 2500;
-
- /* disable frame modes if flags not set */
- if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
- rd->thresh_mult[THR_NEWMV ] = INT_MAX;
- rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
- rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
- rd->thresh_mult[THR_NEARMV ] = INT_MAX;
- }
- if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
- rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
- rd->thresh_mult[THR_ZEROG ] = INT_MAX;
- rd->thresh_mult[THR_NEARG ] = INT_MAX;
- rd->thresh_mult[THR_NEWG ] = INT_MAX;
- }
- if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
- rd->thresh_mult[THR_ZEROA ] = INT_MAX;
- rd->thresh_mult[THR_NEARA ] = INT_MAX;
- rd->thresh_mult[THR_NEWA ] = INT_MAX;
- }
-
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
- (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
- }
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
- (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
- rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
- }
-}
-
-static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- RD_OPT *const rd = &cpi->rd;
- int i;
-
- for (i = 0; i < MAX_REFS; ++i)
- rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
-
- rd->thresh_mult_sub8x8[THR_LAST] += 2500;
- rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
- rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
- rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
- rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
- rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
-
- // Check for masked out split cases.
- for (i = 0; i < MAX_REFS; i++)
- if (sf->disable_split_mask & (1 << i))
- rd->thresh_mult_sub8x8[i] = INT_MAX;
-
- // disable mode test if frame flag is not set
- if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
- rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
- if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
- rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
- if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
- if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
- (VP9_LAST_FLAG | VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
- if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
- (VP9_GOLD_FLAG | VP9_ALT_FLAG))
- rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
-}
static void set_speed_features(VP9_COMP *cpi) {
#if CONFIG_INTERNAL_STATS
@@ -500,8 +384,8 @@ static void set_speed_features(VP9_COMP *cpi) {
vp9_set_speed_features(cpi);
// Set rd thresholds based on mode and speed setting
- set_rd_speed_thresholds(cpi);
- set_rd_speed_thresholds_sub8x8(cpi);
+ vp9_set_rd_speed_thresholds(cpi);
+ vp9_set_rd_speed_thresholds_sub8x8(cpi);
cpi->mb.fwd_txm4x4 = vp9_fdct4x4;
if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
@@ -567,7 +451,6 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
vp9_setup_pc_tree(&cpi->common, &cpi->mb);
}
-
static void update_frame_size(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@@ -1860,8 +1743,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10u %10d %10d %10d %10d %10d "
- "%10"PRId64" %10"PRId64" %10d "
+ fprintf(f, "%10u %10d %10d %10d %10d"
+ "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d "
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
"%6d %6d %5d %5d %5d "
"%10"PRId64" %10.3lf"
@@ -1870,6 +1753,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
cpi->rc.projected_frame_size,
cpi->rc.projected_frame_size / cpi->common.MBs,
(cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
+ cpi->rc.vbr_bits_off_target,
cpi->rc.total_target_vs_actual,
(cpi->oxcf.starting_buffer_level - cpi->rc.bits_off_target),
cpi->rc.total_actual_bits, cm->base_qindex,
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index b63d0da5e..b8d0ec40d 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -85,7 +85,7 @@ void vp9_rc_init_minq_luts() {
gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
- inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55);
+ inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
}
}
@@ -1033,6 +1033,7 @@ void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) {
RATE_CONTROL *const rc = &cpi->rc;
rc->this_frame_target = target;
+
// Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = ((int64_t)rc->this_frame_target * 64 * 64) /
(cm->width * cm->height);
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index fbeca829f..820366119 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -27,7 +27,9 @@ extern "C" {
typedef struct {
// Rate targetting variables
- int this_frame_target;
+ int base_frame_target; // A baseline frame target before adjustment
+ // for previous under or over shoot.
+ int this_frame_target; // Actual frame target after rc adjustment.
int projected_frame_size;
int sb64_target_rate;
int last_q[3]; // Separate values for Intra/Inter/ARF-GF
@@ -67,6 +69,7 @@ typedef struct {
int64_t buffer_level;
int64_t bits_off_target;
+ int64_t vbr_bits_off_target;
int decimation_factor;
int decimation_count;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 63c099d75..55ae721be 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -4374,3 +4374,120 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
return best_rd;
}
+
+void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
+ int i;
+ RD_OPT *const rd = &cpi->rd;
+
+ // Set baseline threshold values
+ for (i = 0; i < MAX_MODES; ++i)
+ rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ rd->thresh_mult[THR_NEARESTMV] = 0;
+ rd->thresh_mult[THR_NEARESTG] = 0;
+ rd->thresh_mult[THR_NEARESTA] = 0;
+
+ rd->thresh_mult[THR_DC] += 1000;
+
+ rd->thresh_mult[THR_NEWMV] += 1000;
+ rd->thresh_mult[THR_NEWA] += 1000;
+ rd->thresh_mult[THR_NEWG] += 1000;
+
+ rd->thresh_mult[THR_NEARMV] += 1000;
+ rd->thresh_mult[THR_NEARA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+ rd->thresh_mult[THR_TM] += 1000;
+
+ rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+ rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
+ rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+ rd->thresh_mult[THR_H_PRED] += 2000;
+ rd->thresh_mult[THR_V_PRED] += 2000;
+ rd->thresh_mult[THR_D45_PRED ] += 2500;
+ rd->thresh_mult[THR_D135_PRED] += 2500;
+ rd->thresh_mult[THR_D117_PRED] += 2500;
+ rd->thresh_mult[THR_D153_PRED] += 2500;
+ rd->thresh_mult[THR_D207_PRED] += 2500;
+ rd->thresh_mult[THR_D63_PRED] += 2500;
+
+ /* disable frame modes if flags not set */
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
+ rd->thresh_mult[THR_NEWMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARMV ] = INT_MAX;
+ }
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROG ] = INT_MAX;
+ rd->thresh_mult[THR_NEARG ] = INT_MAX;
+ rd->thresh_mult[THR_NEWG ] = INT_MAX;
+ }
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROA ] = INT_MAX;
+ rd->thresh_mult[THR_NEARA ] = INT_MAX;
+ rd->thresh_mult[THR_NEWA ] = INT_MAX;
+ }
+
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ }
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
+ rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ }
+}
+
+void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ RD_OPT *const rd = &cpi->rd;
+ int i;
+
+ for (i = 0; i < MAX_REFS; ++i)
+ rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+ rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+ rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+ rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+ rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+ rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+
+ // Check for masked out split cases.
+ for (i = 0; i < MAX_REFS; i++)
+ if (sf->disable_split_mask & (1 << i))
+ rd->thresh_mult_sub8x8[i] = INT_MAX;
+
+ // disable mode test if frame flag is not set
+ if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
+ rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+ if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
+ rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+ if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+ if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_LAST_FLAG | VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+ if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
+ (VP9_GOLD_FLAG | VP9_ALT_FLAG))
+ rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+}
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index a01dbd4d3..e48566499 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -83,6 +83,10 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
ENTROPY_CONTEXT t_above[16],
ENTROPY_CONTEXT t_left[16]);
+void vp9_set_rd_speed_thresholds(VP9_COMP *cpi);
+
+void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index c98c9d415..a5694161c 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -27,8 +27,6 @@
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
-#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering
-
static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
uint8_t *y_mb_ptr,
uint8_t *u_mb_ptr,
@@ -122,8 +120,6 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1,
}
}
-#if ALT_REF_MC_ENABLED
-
static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
uint8_t *arf_frame_buf,
uint8_t *frame_ptr_buf,
@@ -133,6 +129,8 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
int step_param;
int sadpb = x->sadperbit16;
int bestsme = INT_MAX;
+ int distortion;
+ unsigned int sse;
MV best_ref_mv1 = {0, 0};
MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
@@ -154,26 +152,19 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
step_param = cpi->sf.reduce_first_step_size + (cpi->oxcf.speed > 5 ? 1 : 0);
step_param = MIN(step_param, cpi->sf.max_step_search_steps - 2);
- /*cpi->sf.search_method == HEX*/
// Ignore mv costing by sending NULL pointer instead of cost arrays
vp9_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1,
&cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1, ref_mv);
- // Try sub-pixel MC?
- // if (bestsme > error_thresh && bestsme < INT_MAX)
- {
- int distortion;
- unsigned int sse;
- // Ignore mv costing by sending NULL pointer instead of cost array
- bestsme = cpi->find_fractional_mv_step(x, ref_mv,
- &best_ref_mv1,
- cpi->common.allow_high_precision_mv,
- x->errorperbit,
- &cpi->fn_ptr[BLOCK_16X16],
- 0, cpi->sf.subpel_iters_per_step,
- NULL, NULL,
- &distortion, &sse);
- }
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(x, ref_mv,
+ &best_ref_mv1,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[BLOCK_16X16],
+ 0, cpi->sf.subpel_iters_per_step,
+ NULL, NULL,
+ &distortion, &sse);
// Restore input state
x->plane[0].src = src;
@@ -181,7 +172,6 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
return bestsme;
}
-#endif
static void temporal_filter_iterate_c(VP9_COMP *cpi,
int frame_count,
@@ -215,8 +205,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
input_buffer[i] = mbd->plane[i].pre[0].buf;
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
-#if ALT_REF_MC_ENABLED
- // Source frames are extended to 16 pixels. This is different than
+ // Source frames are extended to 16 pixels. This is different than
// L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
// A 6/8 tap filter is used for motion search. This requires 2 pixels
// before and 3 pixels after. So the largest Y mv on a border would
@@ -230,7 +219,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+ (17 - 2 * VP9_INTERP_EXTEND);
-#endif
for (mb_col = 0; mb_col < mb_cols; mb_col++) {
int i, j, k;
@@ -239,13 +227,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
-#if ALT_REF_MC_ENABLED
cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+ (17 - 2 * VP9_INTERP_EXTEND);
-#endif
for (frame = 0; frame < frame_count; frame++) {
+ const int thresh_low = 10000;
+ const int thresh_high = 20000;
+
if (cpi->frames[frame] == NULL)
continue;
@@ -255,38 +244,31 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
if (frame == alt_ref_index) {
filter_weight = 2;
} else {
- int err = 0;
-#if ALT_REF_MC_ENABLED
-#define THRESH_LOW 10000
-#define THRESH_HIGH 20000
-
// Find best match in this frame by MC
- err = temporal_filter_find_matching_mb_c
- (cpi,
- cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
- cpi->frames[frame]->y_buffer + mb_y_offset,
- cpi->frames[frame]->y_stride);
-#endif
+ int err = temporal_filter_find_matching_mb_c(cpi,
+ cpi->frames[alt_ref_index]->y_buffer + mb_y_offset,
+ cpi->frames[frame]->y_buffer + mb_y_offset,
+ cpi->frames[frame]->y_stride);
+
// Assign higher weight to matching MB if it's error
// score is lower. If not applying MC default behavior
// is to weight all MBs equal.
- filter_weight = err < THRESH_LOW
- ? 2 : err < THRESH_HIGH ? 1 : 0;
+ filter_weight = err < thresh_low
+ ? 2 : err < thresh_high ? 1 : 0;
}
if (filter_weight != 0) {
// Construct the predictors
- temporal_filter_predictors_mb_c
- (mbd,
- cpi->frames[frame]->y_buffer + mb_y_offset,
- cpi->frames[frame]->u_buffer + mb_uv_offset,
- cpi->frames[frame]->v_buffer + mb_uv_offset,
- cpi->frames[frame]->y_stride,
- mb_uv_height,
- mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
- mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
- predictor, scale,
- mb_col * 16, mb_row * 16);
+ temporal_filter_predictors_mb_c(mbd,
+ cpi->frames[frame]->y_buffer + mb_y_offset,
+ cpi->frames[frame]->u_buffer + mb_uv_offset,
+ cpi->frames[frame]->v_buffer + mb_uv_offset,
+ cpi->frames[frame]->y_stride,
+ mb_uv_height,
+ mbd->mi[0]->bmi[0].as_mv[0].as_mv.row,
+ mbd->mi[0]->bmi[0].as_mv[0].as_mv.col,
+ predictor, scale,
+ mb_col * 16, mb_row * 16);
// Apply the filter (YUV)
vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
@@ -320,7 +302,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
// move to next pixel
byte++;
}
-
byte += stride - 16;
}
@@ -347,14 +328,11 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
// move to next pixel
byte++;
}
-
byte += stride - mb_uv_height;
}
-
mb_y_offset += 16;
mb_uv_offset += mb_uv_height;
}
-
mb_y_offset += 16 * (f->y_stride - mb_cols);
mb_uv_offset += mb_uv_height * (f->uv_stride - mb_cols);
}
@@ -366,18 +344,14 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi,
void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
VP9_COMMON *const cm = &cpi->common;
-
int frame = 0;
-
int frames_to_blur_backward = 0;
int frames_to_blur_forward = 0;
int frames_to_blur = 0;
int start_frame = 0;
-
int strength = cpi->active_arnr_strength;
int blur_type = cpi->oxcf.arnr_type;
int max_frames = cpi->active_arnr_frames;
-
const int num_frames_backward = distance;
const int num_frames_forward = vp9_lookahead_depth(cpi->lookahead)
- (num_frames_backward + 1);
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c
index 71867a938..a31a476f3 100644
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -18,63 +18,34 @@
#include "vp9/encoder/vp9_variance.h"
-void variance(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- int w,
- int h,
- unsigned int *sse,
- int *sum) {
+void variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
int i, j;
- int diff;
*sum = 0;
*sse = 0;
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
- diff = src_ptr[j] - ref_ptr[j];
+ const int diff = a[j] - b[j];
*sum += diff;
*sse += diff * diff;
}
- src_ptr += source_stride;
- ref_ptr += recon_stride;
+ a += a_stride;
+ b += b_stride;
}
}
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_first_pass
- *
- * INPUTS : uint8_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input
- * samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter
- * taps.
- *
- * OUTPUTS : int32_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement first-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Produces int32_t output to retain precision for next pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=
- * stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// first-pass of 2-D separable filter.
+//
+// Produces int32_t output to retain precision for next pass. Two filter taps
+// should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is
+// applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It
+// defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
uint16_t *output_ptr,
unsigned int src_pixels_per_line,
@@ -99,38 +70,14 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
}
}
-/****************************************************************************
- *
- * ROUTINE : filter_block2d_bil_second_pass
- *
- * INPUTS : int32_t *src_ptr : Pointer to source block.
- * uint32_t src_pixels_per_line : Stride of input block.
- * uint32_t pixel_step : Offset between filter input
- * samples (see notes).
- * uint32_t output_height : Input block height.
- * uint32_t output_width : Input block width.
- * int32_t *vp9_filter : Array of 2 bi-linear filter
- * taps.
- *
- * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block.
- *
- * RETURNS : void
- *
- * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
- * either horizontal or vertical direction to produce the
- * filtered output block. Used to implement second-pass
- * of 2-D separable filter.
- *
- * SPECIAL NOTES : Requires 32-bit input as produced by
- * filter_block2d_bil_first_pass.
- * Two filter taps should sum to VP9_FILTER_WEIGHT.
- * pixel_step defines whether the filter is applied
- * horizontally (pixel_step=1) or vertically (pixel_step=
- * stride).
- * It defines the offset required to move from one input
- * to the next.
- *
- ****************************************************************************/
+// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
+// or vertical direction to produce the filtered output block. Used to implement
+// second-pass of 2-D separable filter.
+//
+// Requires 32-bit input as produced by filter_block2d_bil_first_pass. Two
+// filter taps should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the
+// filter is applied horizontally (pixel_step=1) or vertically (pixel_step=
+// stride). It defines the offset required to move from one input to the next.
static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
uint8_t *output_ptr,
unsigned int src_pixels_per_line,
@@ -156,9 +103,8 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
unsigned int i, sum = 0;
- for (i = 0; i < 256; i++) {
- sum += (src_ptr[i] * src_ptr[i]);
- }
+ for (i = 0; i < 256; i++)
+ sum += src_ptr[i] * src_ptr[i];
return sum;
}
@@ -183,12 +129,10 @@ unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
+ uint16_t fdata3[65 * 64];
uint8_t temp2[68 * 64];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
@@ -205,13 +149,11 @@ unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
+ uint16_t fdata3[65 * 64];
uint8_t temp2[68 * 64];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 64, hfilter);
@@ -240,12 +182,10 @@ unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
+ uint16_t fdata3[65 * 64];
uint8_t temp2[68 * 64];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
@@ -262,13 +202,11 @@ unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
+ uint16_t fdata3[65 * 64];
uint8_t temp2[68 * 64];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 32, hfilter);
@@ -297,12 +235,10 @@ unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
@@ -319,13 +255,11 @@ unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 32, hfilter);
@@ -354,12 +288,10 @@ unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
@@ -376,13 +308,11 @@ unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 16, hfilter);
@@ -582,17 +512,12 @@ unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
- uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ uint16_t fdata3[5 * 4];
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
- // First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 4, hfilter);
-
- // Now filter Verticaly
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
@@ -607,18 +532,13 @@ unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr,
unsigned int *sse,
const uint8_t *second_pred) {
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer
- uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4);
+ uint16_t fdata3[5 * 4];
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
- // First filter 1d Horizontal
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 4, hfilter);
-
- // Now filter Verticaly
var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter);
vp9_comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4);
return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse);
@@ -631,17 +551,14 @@ unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
+ uint16_t fdata3[9 * 8];
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
-
return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -653,18 +570,17 @@ unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering
+ uint16_t fdata3[9 * 8];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 8, hfilter);
var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter);
vp9_comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8);
+
return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse);
}
@@ -675,12 +591,10 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering
+ uint16_t fdata3[17 * 16];
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 16, hfilter);
@@ -699,11 +613,9 @@ unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr,
const uint8_t *second_pred) {
uint16_t fdata3[17 * 16];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 16, hfilter);
@@ -722,10 +634,8 @@ unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
unsigned int *sse) {
uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
uint8_t temp2[68 * 64];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
@@ -742,13 +652,11 @@ unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering
+ uint16_t fdata3[65 * 64];
uint8_t temp2[68 * 64];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 65, 64, hfilter);
@@ -764,12 +672,10 @@ unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
@@ -786,13 +692,11 @@ unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering
+ uint16_t fdata3[33 * 32];
uint8_t temp2[36 * 32];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 33, 32, hfilter);
@@ -928,12 +832,10 @@ unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
+ uint16_t fdata3[16 * 9];
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
@@ -950,13 +852,11 @@ unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering
+ uint16_t fdata3[16 * 9];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 16, hfilter);
@@ -974,10 +874,8 @@ unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
unsigned int *sse) {
uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
@@ -994,13 +892,11 @@ unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering
+ uint16_t fdata3[9 * 16];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 17, 8, hfilter);
@@ -1016,12 +912,10 @@ unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
+ uint16_t fdata3[8 * 5];
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
@@ -1038,13 +932,11 @@ unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering
+ uint16_t fdata3[8 * 5];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 5, 8, hfilter);
@@ -1060,14 +952,12 @@ unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
- uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
+ uint16_t fdata3[5 * 8];
// FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be
// of this big? same issue appears in all other block size settings.
uint8_t temp2[20 * 16];
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 4, hfilter);
@@ -1084,13 +974,11 @@ unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr,
int dst_pixels_per_line,
unsigned int *sse,
const uint8_t *second_pred) {
- uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering
+ uint16_t fdata3[5 * 8];
uint8_t temp2[20 * 16];
- DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer
- const int16_t *hfilter, *vfilter;
-
- hfilter = BILINEAR_FILTERS_2TAP(xoffset);
- vfilter = BILINEAR_FILTERS_2TAP(yoffset);
+ DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8);
+ const int16_t *const hfilter = BILINEAR_FILTERS_2TAP(xoffset);
+ const int16_t *const vfilter = BILINEAR_FILTERS_2TAP(yoffset);
var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line,
1, 9, 4, hfilter);
@@ -1106,9 +994,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
- int tmp;
- tmp = pred[j] + ref[j];
- comp_pred[j] = (tmp + 1) >> 1;
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
}
comp_pred += width;
pred += width;
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index c9e39a1a2..4c8be71cd 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -17,14 +17,10 @@
extern "C" {
#endif
-void variance(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int recon_stride,
- int w,
- int h,
- unsigned int *sse,
- int *sum);
+void variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h,
+ unsigned int *sse, int *sum);
typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,