summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_firstpass.c47
-rw-r--r--vp9/encoder/vp9_mcomp.c40
-rw-r--r--vp9/encoder/vp9_pickmode.c403
-rw-r--r--vp9/encoder/x86/vp9_dct_ssse3.c83
-rw-r--r--vp9/vp9_cx_iface.c4
5 files changed, 302 insertions, 275 deletions
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 046c64170..3f28e0ff3 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -60,6 +60,14 @@
#define RC_FACTOR_MIN 0.75
#define RC_FACTOR_MAX 1.75
+
+#define INTRA_WEIGHT_EXPERIMENT 0
+#if INTRA_WEIGHT_EXPERIMENT
+#define NCOUNT_INTRA_THRESH 8192
+#define NCOUNT_INTRA_FACTOR 3
+#define NCOUNT_FRAME_II_THRESH 5.0
+#endif
+
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001)
#if ARF_STATS_OUTPUT
@@ -470,7 +478,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int intercount = 0;
int second_ref_count = 0;
const int intrapenalty = INTRA_MODE_PENALTY;
- int neutral_count = 0;
+ double neutral_count;
int new_mv_count = 0;
int sum_in_vectors = 0;
MV lastmv = {0, 0};
@@ -503,6 +511,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
intra_factor = 0.0;
brightness_factor = 0.0;
+ neutral_count = 0.0;
set_first_pass_params(cpi);
vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
@@ -818,12 +827,28 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
#endif
if (motion_error <= this_error) {
+ vp9_clear_system_state();
+
// Keep a count of cases where the inter and intra were very close
// and very low. This helps with scene cut detection for example in
// cropped clips with black bars at the sides or top and bottom.
+#if INTRA_WEIGHT_EXPERIMENT
+ if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
+ (this_error < (2 * intrapenalty))) {
+ neutral_count += 1.0;
+ // Also track cases where the intra is not much worse than the inter
+ // and use this in limiting the GF/arf group length.
+ } else if ((this_error > NCOUNT_INTRA_THRESH) &&
+ (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
+ neutral_count += (double)motion_error /
+ DOUBLE_DIVIDE_CHECK((double)this_error);
+ }
+#else
if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
- this_error < 2 * intrapenalty)
- ++neutral_count;
+ (this_error < (2 * intrapenalty))) {
+ neutral_count += 1.0;
+ }
+#endif
mv.row *= 8;
mv.col *= 8;
@@ -1260,17 +1285,27 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
double sr_diff =
(frame->sr_coded_error - frame->coded_error) / num_mbs;
double sr_decay = 1.0;
+ double modified_pct_inter;
+ double modified_pcnt_intra;
const double motion_amplitude_factor =
frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
- const double pcnt_intra = 100 * (1.0 - frame->pcnt_inter);
+
+ modified_pct_inter = frame->pcnt_inter;
+#if INTRA_WEIGHT_EXPERIMENT
+ if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
+ (double)NCOUNT_FRAME_II_THRESH)
+ modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
+#endif
+ modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
+
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
sr_diff = MIN(sr_diff, SR_DIFF_MAX);
sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
(MOTION_AMP_PART * motion_amplitude_factor) -
- (INTRA_PART * pcnt_intra);
+ (INTRA_PART * modified_pcnt_intra);
}
- return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, frame->pcnt_inter));
+ return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
}
// This function gives an estimate of how badly we believe the prediction
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 12882e432..eb01bb279 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1783,9 +1783,8 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
return (center - (bw >> 1));
}
-static const MV search_pos[9] = {
- {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1},
- {1, -1}, {1, 0}, {1, 1},
+static const MV search_pos[5] = {
+ {-1, 0}, {0, -1}, {0, 0}, {0, 1}, {1, 0},
};
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
@@ -1804,7 +1803,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
const int ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf;
MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv;
- int best_sad;
+ int best_sad, tmp_sad, this_sad[5];
MV this_mv;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -1845,21 +1844,40 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
best_sad = INT_MAX;
this_mv = *tmp_mv;
- for (idx = 0; idx < 9; ++idx) {
- int this_sad;
- src_buf = x->plane[0].src.buf;
+ src_buf = x->plane[0].src.buf;
+ for (idx = 0; idx < 5; ++idx) {
ref_buf = xd->plane[0].pre[0].buf +
(search_pos[idx].row + this_mv.row) * ref_stride +
(search_pos[idx].col + this_mv.col);
- this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
- ref_buf, ref_stride);
- if (this_sad < best_sad) {
- best_sad = this_sad;
+ this_sad[idx] = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
+ ref_buf, ref_stride);
+ if (this_sad[idx] < best_sad) {
+ best_sad = this_sad[idx];
tmp_mv->row = search_pos[idx].row + this_mv.row;
tmp_mv->col = search_pos[idx].col + this_mv.col;
}
}
+
+ if (this_sad[0] < this_sad[4])
+ this_mv.row -= 1;
+ else
+ this_mv.row += 1;
+
+ if (this_sad[1] < this_sad[3])
+ this_mv.col -= 1;
+ else
+ this_mv.col += 1;
+
+ ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
+
+ tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride,
+ ref_buf, ref_stride);
+ if (best_sad > tmp_sad) {
+ *tmp_mv = this_mv;
+ best_sad = tmp_sad;
+ }
+
tmp_mv->row *= 8;
tmp_mv->col *= 8;
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index c820651fb..88003ec17 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -217,6 +217,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
const uint32_t ac_quant = pd->dequant[1];
unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse);
+ int skip_dc = 0;
+
*var_y = var;
*sse_y = sse;
@@ -258,6 +260,9 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Check if dc coefficient can be quantized to zero.
if (sse_tx - var_tx < dc_thr || sse == var)
x->skip_txfm[0] = 1;
+ } else {
+ if (sse_tx - var_tx < dc_thr || sse == var)
+ skip_dc = 1;
}
}
@@ -267,21 +272,28 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
return;
}
+ if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
- dc_quant >> (xd->bd - 5), &rate, &dist);
- } else {
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
+ dc_quant >> (xd->bd - 5), &rate, &dist);
+ } else {
+ vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
+ dc_quant >> 3, &rate, &dist);
+ }
+#else
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
- }
-#else
- vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
- dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
- *out_rate_sum = rate >> 1;
- *out_dist_sum = dist << 3;
+ if (!skip_dc) {
+ *out_rate_sum = rate >> 1;
+ *out_dist_sum = dist << 3;
+ } else {
+ *out_rate_sum = 0;
+ *out_dist_sum = (sse - var) << 4;
+ }
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@@ -590,13 +602,27 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
*rd_cost = best_rdc;
}
-static const PREDICTION_MODE inter_mode_set[INTER_MODES] = {
- ZEROMV, NEARESTMV, NEARMV, NEWMV,
-};
-
static const int ref_frame_cost[MAX_REF_FRAMES] = {
1235, 229, 530, 615,
};
+
+typedef struct {
+ MV_REFERENCE_FRAME ref_frame;
+ PREDICTION_MODE pred_mode;
+} REF_MODE;
+
+#define RT_INTER_MODES 8
+static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
+ {LAST_FRAME, ZEROMV},
+ {LAST_FRAME, NEARESTMV},
+ {LAST_FRAME, NEARMV},
+ {LAST_FRAME, NEWMV},
+ {GOLDEN_FRAME, ZEROMV},
+ {GOLDEN_FRAME, NEARESTMV},
+ {GOLDEN_FRAME, NEARMV},
+ {GOLDEN_FRAME, NEWMV}
+};
+
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
@@ -653,6 +679,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const int pixels_in_block = bh * bw;
int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
int ref_frame_skip_mask = 0;
+ int idx;
if (reuse_inter_pred) {
int i;
@@ -736,9 +763,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->rc.frames_since_golden == 0)
ref_frame_skip_mask |= (1 << GOLDEN_FRAME);
- for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
- PREDICTION_MODE this_mode;
- int i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ for (idx = 0; idx < RT_INTER_MODES; ++idx) {
+ int rate_mv = 0;
+ int mode_rd_thresh;
+ int mode_index;
+ int i;
+ PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
+
+ ref_frame = ref_mode_set[idx].ref_frame;
+ mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
+
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
if (!(cpi->ref_frame_flags & flag_list[ref_frame]))
continue;
@@ -760,212 +795,194 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->ref_frame[0] = ref_frame;
set_ref_ptrs(cm, xd, ref_frame, NONE);
- for (i = 0; i < INTER_MODES; ++i) {
- int rate_mv = 0;
- int mode_rd_thresh;
- int mode_index;
- this_mode = inter_mode_set[i];
- mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
+ if (const_motion[ref_frame] && this_mode == NEARMV)
+ continue;
- if (const_motion[ref_frame] && this_mode == NEARMV)
- continue;
+ if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
+ continue;
- if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
- continue;
+ mode_rd_thresh = best_mode_skip_txfm ?
+ rd_threshes[mode_index] << 1 : rd_threshes[mode_index];
+ if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
+ rd_thresh_freq_fact[mode_index]))
+ continue;
- mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1 :
- rd_threshes[mode_index];
- if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
- rd_thresh_freq_fact[mode_index]))
+ if (this_mode == NEWMV) {
+ if (cpi->sf.partition_search_type != VAR_BASED_PARTITION
+ && best_rdc.rdcost < (int64_t) (1 << num_pels_log2_lookup[bsize]))
continue;
+ if (ref_frame > LAST_FRAME) {
+ int tmp_sad;
+ int dis, cost_list[5];
- if (this_mode == NEWMV) {
- if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
- best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
+ if (bsize < BLOCK_16X16)
continue;
- if (ref_frame > LAST_FRAME) {
- int tmp_sad;
- int dis, cost_list[5];
-
- if (bsize < BLOCK_16X16)
- continue;
-
- tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
- if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
- continue;
-
- frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
- rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
- &mbmi->ref_mvs[ref_frame][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
- frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
-
- cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
- &mbmi->ref_mvs[ref_frame][0].as_mv,
- cpi->common.allow_high_precision_mv,
- x->errorperbit,
- &cpi->fn_ptr[bsize],
- cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis,
- &x->pred_sse[ref_frame], NULL, 0, 0);
- } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost)) {
+ tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
continue;
- }
- }
- if (this_mode != NEARESTMV &&
- frame_mv[this_mode][ref_frame].as_int ==
- frame_mv[NEARESTMV][ref_frame].as_int)
+ frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+ rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+ frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+ cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
+ &mbmi->ref_mvs[ref_frame][0].as_mv,
+ cpi->common.allow_high_precision_mv,
+ x->errorperbit,
+ &cpi->fn_ptr[bsize],
+ cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_iters_per_step,
+ cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis,
+ &x->pred_sse[ref_frame], NULL, 0, 0);
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) {
continue;
+ }
+ }
- mbmi->mode = this_mode;
- mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+ if (this_mode != NEARESTMV && frame_mv[this_mode][ref_frame].as_int ==
+ frame_mv[NEARESTMV][ref_frame].as_int)
+ continue;
- // Search for the best prediction filter type, when the resulting
- // motion vector is at sub-pixel accuracy level for luma component, i.e.,
- // the last three bits are all zeros.
- if (reuse_inter_pred) {
- if (!this_mode_pred) {
- this_mode_pred = &tmp[3];
- } else {
- this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
- pd->dst.buf = this_mode_pred->data;
- pd->dst.stride = bw;
- }
+ mbmi->mode = this_mode;
+ mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
+
+ // Search for the best prediction filter type, when the resulting
+ // motion vector is at sub-pixel accuracy level for luma component, i.e.,
+ // the last three bits are all zeros.
+ if (reuse_inter_pred) {
+ if (!this_mode_pred) {
+ this_mode_pred = &tmp[3];
+ } else {
+ this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = this_mode_pred->data;
+ pd->dst.stride = bw;
}
+ }
- if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
- pred_filter_search && (ref_frame == LAST_FRAME) &&
- ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
- (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
- int pf_rate[3];
- int64_t pf_dist[3];
- unsigned int pf_var[3];
- unsigned int pf_sse[3];
- TX_SIZE pf_tx_size[3];
- int64_t best_cost = INT64_MAX;
- INTERP_FILTER best_filter = SWITCHABLE, filter;
- PRED_BUFFER *current_pred = this_mode_pred;
-
- for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
- int64_t cost;
- mbmi->interp_filter = filter;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter],
- &pf_dist[filter], &pf_var[filter], &pf_sse[filter]);
- pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
- cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
- pf_tx_size[filter] = mbmi->tx_size;
- if (cost < best_cost) {
- best_filter = filter;
- best_cost = cost;
- skip_txfm = x->skip_txfm[0];
-
- if (reuse_inter_pred) {
- if (this_mode_pred != current_pred) {
- free_pred_buffer(this_mode_pred);
- this_mode_pred = current_pred;
- }
-
- if (filter < EIGHTTAP_SHARP) {
- current_pred = &tmp[get_pred_buffer(tmp, 3)];
- pd->dst.buf = current_pred->data;
- pd->dst.stride = bw;
- }
+ if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search
+ && (ref_frame == LAST_FRAME)
+ && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) {
+ int pf_rate[3];
+ int64_t pf_dist[3];
+ unsigned int pf_var[3];
+ unsigned int pf_sse[3];
+ TX_SIZE pf_tx_size[3];
+ int64_t best_cost = INT64_MAX;
+ INTERP_FILTER best_filter = SWITCHABLE, filter;
+ PRED_BUFFER *current_pred = this_mode_pred;
+
+ for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) {
+ int64_t cost;
+ mbmi->interp_filter = filter;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
+ &pf_var[filter], &pf_sse[filter]);
+ pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
+ cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
+ pf_tx_size[filter] = mbmi->tx_size;
+ if (cost < best_cost) {
+ best_filter = filter;
+ best_cost = cost;
+ skip_txfm = x->skip_txfm[0];
+
+ if (reuse_inter_pred) {
+ if (this_mode_pred != current_pred) {
+ free_pred_buffer(this_mode_pred);
+ this_mode_pred = current_pred;
+ }
+
+ if (filter < EIGHTTAP_SHARP) {
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
}
}
}
-
- if (reuse_inter_pred && this_mode_pred != current_pred)
- free_pred_buffer(current_pred);
-
- mbmi->interp_filter = best_filter;
- mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
- this_rdc.rate = pf_rate[mbmi->interp_filter];
- this_rdc.dist = pf_dist[mbmi->interp_filter];
- var_y = pf_var[mbmi->interp_filter];
- sse_y = pf_sse[mbmi->interp_filter];
- x->skip_txfm[0] = skip_txfm;
- } else {
- mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
- &var_y, &sse_y);
- this_rdc.rate += cm->interp_filter == SWITCHABLE ?
- vp9_get_switchable_rate(cpi, xd) : 0;
}
- // chroma component rate-distortion cost modeling
- if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
- int uv_rate = 0;
- int64_t uv_dist = 0;
- if (x->color_sensitivity[0])
- vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
- if (x->color_sensitivity[1])
- vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
- model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist,
- &var_y, &sse_y);
- this_rdc.rate += uv_rate;
- this_rdc.dist += uv_dist;
- }
+ if (reuse_inter_pred && this_mode_pred != current_pred)
+ free_pred_buffer(current_pred);
- this_rdc.rate += rate_mv;
- this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
- [INTER_OFFSET(this_mode)];
- this_rdc.rate += ref_frame_cost[ref_frame];
- this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
- this_rdc.rate, this_rdc.dist);
+ mbmi->interp_filter = best_filter;
+ mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
+ this_rdc.rate = pf_rate[mbmi->interp_filter];
+ this_rdc.dist = pf_dist[mbmi->interp_filter];
+ var_y = pf_var[mbmi->interp_filter];
+ sse_y = pf_sse[mbmi->interp_filter];
+ x->skip_txfm[0] = skip_txfm;
+ } else {
+ mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
+ &var_y, &sse_y);
+ this_rdc.rate +=
+ cm->interp_filter == SWITCHABLE ?
+ vp9_get_switchable_rate(cpi, xd) : 0;
+ }
- // Skipping checking: test to see if this block can be reconstructed by
- // prediction only.
- if (cpi->allow_encode_breakout) {
- encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame,
- this_mode, var_y, sse_y, yv12_mb,
- &this_rdc.rate, &this_rdc.dist);
- if (x->skip) {
- this_rdc.rate += rate_mv;
- this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
- this_rdc.rate, this_rdc.dist);
- }
+ // chroma component rate-distortion cost modeling
+ if (x->color_sensitivity[0] || x->color_sensitivity[1]) {
+ int uv_rate = 0;
+ int64_t uv_dist = 0;
+ if (x->color_sensitivity[0])
+ vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1);
+ if (x->color_sensitivity[1])
+ vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2);
+ model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, &var_y, &sse_y);
+ this_rdc.rate += uv_rate;
+ this_rdc.dist += uv_dist;
+ }
+
+ this_rdc.rate += rate_mv;
+ this_rdc.rate +=
+ cpi->inter_mode_cost[mbmi->mode_context[ref_frame]][INTER_OFFSET(
+ this_mode)];
+ this_rdc.rate += ref_frame_cost[ref_frame];
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
+
+ // Skipping checking: test to see if this block can be reconstructed by
+ // prediction only.
+ if (cpi->allow_encode_breakout) {
+ encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode,
+ var_y, sse_y, yv12_mb, &this_rdc.rate,
+ &this_rdc.dist);
+ if (x->skip) {
+ this_rdc.rate += rate_mv;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate,
+ this_rdc.dist);
}
+ }
#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0)
- vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
+ if (cpi->oxcf.noise_sensitivity > 0)
+ vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
#else
- (void)ctx;
+ (void)ctx;
#endif
- if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
- best_rdc = this_rdc;
- best_mode = this_mode;
- best_pred_filter = mbmi->interp_filter;
- best_tx_size = mbmi->tx_size;
- best_ref_frame = ref_frame;
- best_mode_skip_txfm = x->skip_txfm[0];
-
- if (reuse_inter_pred) {
- free_pred_buffer(best_pred);
- best_pred = this_mode_pred;
- }
- } else {
- if (reuse_inter_pred)
- free_pred_buffer(this_mode_pred);
- }
+ if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
+ best_rdc = this_rdc;
+ best_mode = this_mode;
+ best_pred_filter = mbmi->interp_filter;
+ best_tx_size = mbmi->tx_size;
+ best_ref_frame = ref_frame;
+ best_mode_skip_txfm = x->skip_txfm[0];
- if (x->skip)
- break;
+ if (reuse_inter_pred) {
+ free_pred_buffer(best_pred);
+ best_pred = this_mode_pred;
+ }
+ } else {
+ if (reuse_inter_pred)
+ free_pred_buffer(this_mode_pred);
}
- // Check that a prediction mode has been selected.
- assert(best_rdc.rdcost < INT64_MAX);
-
if (x->skip)
break;
}
diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c
index 5c0ad7892..a1a2bda80 100644
--- a/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -102,99 +102,56 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
const __m128i r2 = _mm_sub_epi16(q1, q2);
const __m128i r3 = _mm_sub_epi16(q0, q3);
// Interleave to do the multiply by constants which gets us into 32bits
- const __m128i t0 = _mm_add_epi16(r0, r1);
- const __m128i t1 = _mm_sub_epi16(r0, r1);
+ const __m128i t0 = _mm_unpacklo_epi16(r0, r1);
+ const __m128i t1 = _mm_unpackhi_epi16(r0, r1);
const __m128i t2 = _mm_unpacklo_epi16(r2, r3);
const __m128i t3 = _mm_unpackhi_epi16(r2, r3);
- const __m128i u0 = _mm_mulhrs_epi16(t0, k__dual_p16_p16);
- const __m128i u1 = _mm_mulhrs_epi16(t1, k__dual_p16_p16);
+ const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16);
+ const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16);
+ const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16);
+ const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16);
+
const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08);
const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08);
const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24);
const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24);
// dct_const_round_shift
- const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
- const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
- const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
- const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
- const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
- const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
- const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
- const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
- // Combine
- res0 = u0;
- res4 = u1;
- res2 = _mm_packs_epi32(w4, w5);
- res6 = _mm_packs_epi32(w6, w7);
- }
- // Work on next four results
- if (pass == 1) {
- // Interleave to do the multiply by constants which gets us into 32bits
- const __m128i d0 = _mm_unpacklo_epi16(q6, q5);
- const __m128i d1 = _mm_unpackhi_epi16(q6, q5);
- const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16);
- const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16);
- const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16);
- const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16);
- // dct_const_round_shift
- const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING);
- const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING);
- const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING);
- const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING);
- const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS);
- const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS);
- const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS);
- const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS);
- // Combine
- const __m128i r0 = _mm_packs_epi32(s0, s1);
- const __m128i r1 = _mm_packs_epi32(s2, s3);
- // Add/subtract
- const __m128i x0 = _mm_add_epi16(q4, r0);
- const __m128i x1 = _mm_sub_epi16(q4, r0);
- const __m128i x2 = _mm_sub_epi16(q7, r1);
- const __m128i x3 = _mm_add_epi16(q7, r1);
- // Interleave to do the multiply by constants which gets us into 32bits
- const __m128i t0 = _mm_unpacklo_epi16(x0, x3);
- const __m128i t1 = _mm_unpackhi_epi16(x0, x3);
- const __m128i t2 = _mm_unpacklo_epi16(x1, x2);
- const __m128i t3 = _mm_unpackhi_epi16(x1, x2);
- const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04);
- const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04);
- const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28);
- const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28);
- const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20);
- const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20);
- const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12);
- const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12);
- // dct_const_round_shift
+
const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING);
const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING);
const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING);
const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING);
+
const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING);
const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING);
const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING);
const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING);
+
const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS);
const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS);
const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS);
const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS);
+
const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS);
const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS);
const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS);
const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS);
// Combine
- res1 = _mm_packs_epi32(w0, w1);
- res7 = _mm_packs_epi32(w2, w3);
- res5 = _mm_packs_epi32(w4, w5);
- res3 = _mm_packs_epi32(w6, w7);
- } else {
+
+ res0 = _mm_packs_epi32(w0, w1);
+ res4 = _mm_packs_epi32(w2, w3);
+ res2 = _mm_packs_epi32(w4, w5);
+ res6 = _mm_packs_epi32(w6, w7);
+ }
+ // Work on next four results
+ {
// Interleave to do the multiply by constants which gets us into 32bits
const __m128i d0 = _mm_sub_epi16(q6, q5);
const __m128i d1 = _mm_add_epi16(q6, q5);
const __m128i r0 = _mm_mulhrs_epi16(d0, k__dual_p16_p16);
const __m128i r1 = _mm_mulhrs_epi16(d1, k__dual_p16_p16);
+
// Add/subtract
const __m128i x0 = _mm_add_epi16(q4, r0);
const __m128i x1 = _mm_sub_epi16(q4, r0);
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index d1dd66b2c..fb6aac721 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -694,7 +694,7 @@ static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(
vpx_codec_alg_priv_t *ctx, va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.gf_cbr_boost_pct =
- CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);
+ CAST(VP9E_SET_GF_CBR_BOOST_PCT, args);
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1387,7 +1387,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8E_SET_CQ_LEVEL, ctrl_set_cq_level},
{VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct},
{VP8E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct},
- {VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct},
+ {VP9E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct},
{VP9E_SET_LOSSLESS, ctrl_set_lossless},
{VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode},
{VP9E_SET_AQ_MODE, ctrl_set_aq_mode},