diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 47 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 40 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 403 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_dct_ssse3.c | 83 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 4 |
5 files changed, 302 insertions, 275 deletions
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 046c64170..3f28e0ff3 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -60,6 +60,14 @@ #define RC_FACTOR_MIN 0.75 #define RC_FACTOR_MAX 1.75 + +#define INTRA_WEIGHT_EXPERIMENT 0 +#if INTRA_WEIGHT_EXPERIMENT +#define NCOUNT_INTRA_THRESH 8192 +#define NCOUNT_INTRA_FACTOR 3 +#define NCOUNT_FRAME_II_THRESH 5.0 +#endif + #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x) - 0.000001 : (x) + 0.000001) #if ARF_STATS_OUTPUT @@ -470,7 +478,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { int intercount = 0; int second_ref_count = 0; const int intrapenalty = INTRA_MODE_PENALTY; - int neutral_count = 0; + double neutral_count; int new_mv_count = 0; int sum_in_vectors = 0; MV lastmv = {0, 0}; @@ -503,6 +511,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { intra_factor = 0.0; brightness_factor = 0.0; + neutral_count = 0.0; set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth)); @@ -818,12 +827,28 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { #endif if (motion_error <= this_error) { + vp9_clear_system_state(); + // Keep a count of cases where the inter and intra were very close // and very low. This helps with scene cut detection for example in // cropped clips with black bars at the sides or top and bottom. +#if INTRA_WEIGHT_EXPERIMENT + if (((this_error - intrapenalty) * 9 <= motion_error * 10) && + (this_error < (2 * intrapenalty))) { + neutral_count += 1.0; + // Also track cases where the intra is not much worse than the inter + // and use this in limiting the GF/arf group length. + } else if ((this_error > NCOUNT_INTRA_THRESH) && + (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) { + neutral_count += (double)motion_error / + DOUBLE_DIVIDE_CHECK((double)this_error); + } +#else if (((this_error - intrapenalty) * 9 <= motion_error * 10) && - this_error < 2 * intrapenalty) - ++neutral_count; + (this_error < (2 * intrapenalty))) { + neutral_count += 1.0; + } +#endif mv.row *= 8; mv.col *= 8; @@ -1260,17 +1285,27 @@ static double get_sr_decay_rate(const VP9_COMP *cpi, double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs; double sr_decay = 1.0; + double modified_pct_inter; + double modified_pcnt_intra; const double motion_amplitude_factor = frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2); - const double pcnt_intra = 100 * (1.0 - frame->pcnt_inter); + + modified_pct_inter = frame->pcnt_inter; +#if INTRA_WEIGHT_EXPERIMENT + if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < + (double)NCOUNT_FRAME_II_THRESH) + modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral; +#endif + modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); + if ((sr_diff > LOW_SR_DIFF_TRHESH)) { sr_diff = MIN(sr_diff, SR_DIFF_MAX); sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - (MOTION_AMP_PART * motion_amplitude_factor) - - (INTRA_PART * pcnt_intra); + (INTRA_PART * modified_pcnt_intra); } - return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, frame->pcnt_inter)); + return MAX(sr_decay, MIN(DEFAULT_DECAY_LIMIT, modified_pct_inter)); } // This function gives an estimate of how badly we believe the prediction diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 12882e432..eb01bb279 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1783,9 +1783,8 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) { return (center - (bw >> 1)); } -static const MV search_pos[9] = { - {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1}, - {1, -1}, {1, 0}, {1, 1}, +static const MV search_pos[5] = { + {-1, 0}, {0, -1}, {0, 0}, {0, 1}, {1, 0}, }; unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, @@ -1804,7 +1803,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv; - int best_sad; + int best_sad, tmp_sad, this_sad[5]; MV this_mv; #if CONFIG_VP9_HIGHBITDEPTH @@ -1845,21 +1844,40 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, best_sad = INT_MAX; this_mv = *tmp_mv; - for (idx = 0; idx < 9; ++idx) { - int this_sad; - src_buf = x->plane[0].src.buf; + src_buf = x->plane[0].src.buf; + for (idx = 0; idx < 5; ++idx) { ref_buf = xd->plane[0].pre[0].buf + (search_pos[idx].row + this_mv.row) * ref_stride + (search_pos[idx].col + this_mv.col); - this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, - ref_buf, ref_stride); - if (this_sad < best_sad) { - best_sad = this_sad; + this_sad[idx] = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, + ref_buf, ref_stride); + if (this_sad[idx] < best_sad) { + best_sad = this_sad[idx]; tmp_mv->row = search_pos[idx].row + this_mv.row; tmp_mv->col = search_pos[idx].col + this_mv.col; } } + + if (this_sad[0] < this_sad[4]) + this_mv.row -= 1; + else + this_mv.row += 1; + + if (this_sad[1] < this_sad[3]) + this_mv.col -= 1; + else + this_mv.col += 1; + + ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; + + tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, + ref_buf, ref_stride); + if (best_sad > tmp_sad) { + *tmp_mv = this_mv; + best_sad = tmp_sad; + } + tmp_mv->row *= 8; tmp_mv->col *= 8; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index c820651fb..88003ec17 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -217,6 +217,8 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, const uint32_t ac_quant = pd->dequant[1]; unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); + int skip_dc = 0; + *var_y = var; *sse_y = sse; @@ -258,6 +260,9 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, // Check if dc coefficient can be quantized to zero. if (sse_tx - var_tx < dc_thr || sse == var) x->skip_txfm[0] = 1; + } else { + if (sse_tx - var_tx < dc_thr || sse == var) + skip_dc = 1; } } @@ -267,21 +272,28 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, return; } + if (!skip_dc) { #if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], - dc_quant >> (xd->bd - 5), &rate, &dist); - } else { + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], + dc_quant >> (xd->bd - 5), &rate, &dist); + } else { + vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], + dc_quant >> 3, &rate, &dist); + } +#else vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], dc_quant >> 3, &rate, &dist); - } -#else - vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], - dc_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH + } - *out_rate_sum = rate >> 1; - *out_dist_sum = dist << 3; + if (!skip_dc) { + *out_rate_sum = rate >> 1; + *out_dist_sum = dist << 3; + } else { + *out_rate_sum = 0; + *out_dist_sum = (sse - var) << 4; + } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -590,13 +602,27 @@ void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, *rd_cost = best_rdc; } -static const PREDICTION_MODE inter_mode_set[INTER_MODES] = { - ZEROMV, NEARESTMV, NEARMV, NEWMV, -}; - static const int ref_frame_cost[MAX_REF_FRAMES] = { 1235, 229, 530, 615, }; + +typedef struct { + MV_REFERENCE_FRAME ref_frame; + PREDICTION_MODE pred_mode; +} REF_MODE; + +#define RT_INTER_MODES 8 +static const REF_MODE ref_mode_set[RT_INTER_MODES] = { + {LAST_FRAME, ZEROMV}, + {LAST_FRAME, NEARESTMV}, + {LAST_FRAME, NEARMV}, + {LAST_FRAME, NEWMV}, + {GOLDEN_FRAME, ZEROMV}, + {GOLDEN_FRAME, NEARESTMV}, + {GOLDEN_FRAME, NEARMV}, + {GOLDEN_FRAME, NEWMV} +}; + // TODO(jingning) placeholder for inter-frame non-RD mode decision. // this needs various further optimizations. to be continued.. void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -653,6 +679,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const int pixels_in_block = bh * bw; int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready; int ref_frame_skip_mask = 0; + int idx; if (reuse_inter_pred) { int i; @@ -736,9 +763,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->rc.frames_since_golden == 0) ref_frame_skip_mask |= (1 << GOLDEN_FRAME); - for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { - PREDICTION_MODE this_mode; - int i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; + for (idx = 0; idx < RT_INTER_MODES; ++idx) { + int rate_mv = 0; + int mode_rd_thresh; + int mode_index; + int i; + PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode; + + ref_frame = ref_mode_set[idx].ref_frame; + mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; + + i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -760,212 +795,194 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; set_ref_ptrs(cm, xd, ref_frame, NONE); - for (i = 0; i < INTER_MODES; ++i) { - int rate_mv = 0; - int mode_rd_thresh; - int mode_index; - this_mode = inter_mode_set[i]; - mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; + if (const_motion[ref_frame] && this_mode == NEARMV) + continue; - if (const_motion[ref_frame] && this_mode == NEARMV) - continue; + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) + continue; - if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) - continue; + mode_rd_thresh = best_mode_skip_txfm ? + rd_threshes[mode_index] << 1 : rd_threshes[mode_index]; + if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + rd_thresh_freq_fact[mode_index])) + continue; - mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1 : - rd_threshes[mode_index]; - if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, - rd_thresh_freq_fact[mode_index])) + if (this_mode == NEWMV) { + if (cpi->sf.partition_search_type != VAR_BASED_PARTITION + && best_rdc.rdcost < (int64_t) (1 << num_pels_log2_lookup[bsize])) continue; + if (ref_frame > LAST_FRAME) { + int tmp_sad; + int dis, cost_list[5]; - if (this_mode == NEWMV) { - if (cpi->sf.partition_search_type != VAR_BASED_PARTITION && - best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize])) + if (bsize < BLOCK_16X16) continue; - if (ref_frame > LAST_FRAME) { - int tmp_sad; - int dis, cost_list[5]; - - if (bsize < BLOCK_16X16) - continue; - - tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize); - if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) - continue; - - frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int; - rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, - &mbmi->ref_mvs[ref_frame][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - frame_mv[NEWMV][ref_frame].as_mv.row >>= 3; - frame_mv[NEWMV][ref_frame].as_mv.col >>= 3; - - cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv, - &mbmi->ref_mvs[ref_frame][0].as_mv, - cpi->common.allow_high_precision_mv, - x->errorperbit, - &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, - &x->pred_sse[ref_frame], NULL, 0, 0); - } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, - &frame_mv[NEWMV][ref_frame], - &rate_mv, best_rdc.rdcost)) { + tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize); + if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue; - } - } - if (this_mode != NEARESTMV && - frame_mv[this_mode][ref_frame].as_int == - frame_mv[NEARESTMV][ref_frame].as_int) + frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int; + rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, + &mbmi->ref_mvs[ref_frame][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + frame_mv[NEWMV][ref_frame].as_mv.row >>= 3; + frame_mv[NEWMV][ref_frame].as_mv.col >>= 3; + + cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv, + &mbmi->ref_mvs[ref_frame][0].as_mv, + cpi->common.allow_high_precision_mv, + x->errorperbit, + &cpi->fn_ptr[bsize], + cpi->sf.mv.subpel_force_stop, + cpi->sf.mv.subpel_iters_per_step, + cond_cost_list(cpi, cost_list), + x->nmvjointcost, x->mvcost, &dis, + &x->pred_sse[ref_frame], NULL, 0, 0); + } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, + &frame_mv[NEWMV][ref_frame], &rate_mv, best_rdc.rdcost)) { continue; + } + } - mbmi->mode = this_mode; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + if (this_mode != NEARESTMV && frame_mv[this_mode][ref_frame].as_int == + frame_mv[NEARESTMV][ref_frame].as_int) + continue; - // Search for the best prediction filter type, when the resulting - // motion vector is at sub-pixel accuracy level for luma component, i.e., - // the last three bits are all zeros. - if (reuse_inter_pred) { - if (!this_mode_pred) { - this_mode_pred = &tmp[3]; - } else { - this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = this_mode_pred->data; - pd->dst.stride = bw; - } + mbmi->mode = this_mode; + mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + + // Search for the best prediction filter type, when the resulting + // motion vector is at sub-pixel accuracy level for luma component, i.e., + // the last three bits are all zeros. + if (reuse_inter_pred) { + if (!this_mode_pred) { + this_mode_pred = &tmp[3]; + } else { + this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = this_mode_pred->data; + pd->dst.stride = bw; } + } - if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && - pred_filter_search && (ref_frame == LAST_FRAME) && - ((mbmi->mv[0].as_mv.row & 0x07) != 0 || - (mbmi->mv[0].as_mv.col & 0x07) != 0)) { - int pf_rate[3]; - int64_t pf_dist[3]; - unsigned int pf_var[3]; - unsigned int pf_sse[3]; - TX_SIZE pf_tx_size[3]; - int64_t best_cost = INT64_MAX; - INTERP_FILTER best_filter = SWITCHABLE, filter; - PRED_BUFFER *current_pred = this_mode_pred; - - for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { - int64_t cost; - mbmi->interp_filter = filter; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], - &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); - pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); - cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); - pf_tx_size[filter] = mbmi->tx_size; - if (cost < best_cost) { - best_filter = filter; - best_cost = cost; - skip_txfm = x->skip_txfm[0]; - - if (reuse_inter_pred) { - if (this_mode_pred != current_pred) { - free_pred_buffer(this_mode_pred); - this_mode_pred = current_pred; - } - - if (filter < EIGHTTAP_SHARP) { - current_pred = &tmp[get_pred_buffer(tmp, 3)]; - pd->dst.buf = current_pred->data; - pd->dst.stride = bw; - } + if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search + && (ref_frame == LAST_FRAME) + && (((mbmi->mv[0].as_mv.row | mbmi->mv[0].as_mv.col) & 0x07) != 0)) { + int pf_rate[3]; + int64_t pf_dist[3]; + unsigned int pf_var[3]; + unsigned int pf_sse[3]; + TX_SIZE pf_tx_size[3]; + int64_t best_cost = INT64_MAX; + INTERP_FILTER best_filter = SWITCHABLE, filter; + PRED_BUFFER *current_pred = this_mode_pred; + + for (filter = EIGHTTAP; filter <= EIGHTTAP_SHARP; ++filter) { + int64_t cost; + mbmi->interp_filter = filter; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], + &pf_var[filter], &pf_sse[filter]); + pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); + cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); + pf_tx_size[filter] = mbmi->tx_size; + if (cost < best_cost) { + best_filter = filter; + best_cost = cost; + skip_txfm = x->skip_txfm[0]; + + if (reuse_inter_pred) { + if (this_mode_pred != current_pred) { + free_pred_buffer(this_mode_pred); + this_mode_pred = current_pred; + } + + if (filter < EIGHTTAP_SHARP) { + current_pred = &tmp[get_pred_buffer(tmp, 3)]; + pd->dst.buf = current_pred->data; + pd->dst.stride = bw; } } } - - if (reuse_inter_pred && this_mode_pred != current_pred) - free_pred_buffer(current_pred); - - mbmi->interp_filter = best_filter; - mbmi->tx_size = pf_tx_size[mbmi->interp_filter]; - this_rdc.rate = pf_rate[mbmi->interp_filter]; - this_rdc.dist = pf_dist[mbmi->interp_filter]; - var_y = pf_var[mbmi->interp_filter]; - sse_y = pf_sse[mbmi->interp_filter]; - x->skip_txfm[0] = skip_txfm; - } else { - mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, - &var_y, &sse_y); - this_rdc.rate += cm->interp_filter == SWITCHABLE ? - vp9_get_switchable_rate(cpi, xd) : 0; } - // chroma component rate-distortion cost modeling - if (x->color_sensitivity[0] || x->color_sensitivity[1]) { - int uv_rate = 0; - int64_t uv_dist = 0; - if (x->color_sensitivity[0]) - vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); - if (x->color_sensitivity[1]) - vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); - model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, - &var_y, &sse_y); - this_rdc.rate += uv_rate; - this_rdc.dist += uv_dist; - } + if (reuse_inter_pred && this_mode_pred != current_pred) + free_pred_buffer(current_pred); - this_rdc.rate += rate_mv; - this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]] - [INTER_OFFSET(this_mode)]; - this_rdc.rate += ref_frame_cost[ref_frame]; - this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, - this_rdc.rate, this_rdc.dist); + mbmi->interp_filter = best_filter; + mbmi->tx_size = pf_tx_size[mbmi->interp_filter]; + this_rdc.rate = pf_rate[mbmi->interp_filter]; + this_rdc.dist = pf_dist[mbmi->interp_filter]; + var_y = pf_var[mbmi->interp_filter]; + sse_y = pf_sse[mbmi->interp_filter]; + x->skip_txfm[0] = skip_txfm; + } else { + mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, + &var_y, &sse_y); + this_rdc.rate += + cm->interp_filter == SWITCHABLE ? + vp9_get_switchable_rate(cpi, xd) : 0; + } - // Skipping checking: test to see if this block can be reconstructed by - // prediction only. - if (cpi->allow_encode_breakout) { - encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, - this_mode, var_y, sse_y, yv12_mb, - &this_rdc.rate, &this_rdc.dist); - if (x->skip) { - this_rdc.rate += rate_mv; - this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, - this_rdc.rate, this_rdc.dist); - } + // chroma component rate-distortion cost modeling + if (x->color_sensitivity[0] || x->color_sensitivity[1]) { + int uv_rate = 0; + int64_t uv_dist = 0; + if (x->color_sensitivity[0]) + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); + if (x->color_sensitivity[1]) + vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); + model_rd_for_sb_uv(cpi, bsize, x, xd, &uv_rate, &uv_dist, &var_y, &sse_y); + this_rdc.rate += uv_rate; + this_rdc.dist += uv_dist; + } + + this_rdc.rate += rate_mv; + this_rdc.rate += + cpi->inter_mode_cost[mbmi->mode_context[ref_frame]][INTER_OFFSET( + this_mode)]; + this_rdc.rate += ref_frame_cost[ref_frame]; + this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); + + // Skipping checking: test to see if this block can be reconstructed by + // prediction only. + if (cpi->allow_encode_breakout) { + encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode, + var_y, sse_y, yv12_mb, &this_rdc.rate, + &this_rdc.dist); + if (x->skip) { + this_rdc.rate += rate_mv; + this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, + this_rdc.dist); } + } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) - vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx); + if (cpi->oxcf.noise_sensitivity > 0) + vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx); #else - (void)ctx; + (void)ctx; #endif - if (this_rdc.rdcost < best_rdc.rdcost || x->skip) { - best_rdc = this_rdc; - best_mode = this_mode; - best_pred_filter = mbmi->interp_filter; - best_tx_size = mbmi->tx_size; - best_ref_frame = ref_frame; - best_mode_skip_txfm = x->skip_txfm[0]; - - if (reuse_inter_pred) { - free_pred_buffer(best_pred); - best_pred = this_mode_pred; - } - } else { - if (reuse_inter_pred) - free_pred_buffer(this_mode_pred); - } + if (this_rdc.rdcost < best_rdc.rdcost || x->skip) { + best_rdc = this_rdc; + best_mode = this_mode; + best_pred_filter = mbmi->interp_filter; + best_tx_size = mbmi->tx_size; + best_ref_frame = ref_frame; + best_mode_skip_txfm = x->skip_txfm[0]; - if (x->skip) - break; + if (reuse_inter_pred) { + free_pred_buffer(best_pred); + best_pred = this_mode_pred; + } + } else { + if (reuse_inter_pred) + free_pred_buffer(this_mode_pred); } - // Check that a prediction mode has been selected. - assert(best_rdc.rdcost < INT64_MAX); - if (x->skip) break; } diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c index 5c0ad7892..a1a2bda80 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/vp9/encoder/x86/vp9_dct_ssse3.c @@ -102,99 +102,56 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, const __m128i r2 = _mm_sub_epi16(q1, q2); const __m128i r3 = _mm_sub_epi16(q0, q3); // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_add_epi16(r0, r1); - const __m128i t1 = _mm_sub_epi16(r0, r1); + const __m128i t0 = _mm_unpacklo_epi16(r0, r1); + const __m128i t1 = _mm_unpackhi_epi16(r0, r1); const __m128i t2 = _mm_unpacklo_epi16(r2, r3); const __m128i t3 = _mm_unpackhi_epi16(r2, r3); - const __m128i u0 = _mm_mulhrs_epi16(t0, k__dual_p16_p16); - const __m128i u1 = _mm_mulhrs_epi16(t1, k__dual_p16_p16); + const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); + const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16); + const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); + const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16); + const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08); const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24); // dct_const_round_shift - const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); - const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); - const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); - const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); - const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); - const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); - const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); - const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); - // Combine - res0 = u0; - res4 = u1; - res2 = _mm_packs_epi32(w4, w5); - res6 = _mm_packs_epi32(w6, w7); - } - // Work on next four results - if (pass == 1) { - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i d0 = _mm_unpacklo_epi16(q6, q5); - const __m128i d1 = _mm_unpackhi_epi16(q6, q5); - const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16); - const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16); - const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16); - const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16); - // dct_const_round_shift - const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING); - const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING); - const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING); - const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING); - const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS); - const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS); - const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS); - const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS); - // Combine - const __m128i r0 = _mm_packs_epi32(s0, s1); - const __m128i r1 = _mm_packs_epi32(s2, s3); - // Add/subtract - const __m128i x0 = _mm_add_epi16(q4, r0); - const __m128i x1 = _mm_sub_epi16(q4, r0); - const __m128i x2 = _mm_sub_epi16(q7, r1); - const __m128i x3 = _mm_add_epi16(q7, r1); - // Interleave to do the multiply by constants which gets us into 32bits - const __m128i t0 = _mm_unpacklo_epi16(x0, x3); - const __m128i t1 = _mm_unpackhi_epi16(x0, x3); - const __m128i t2 = _mm_unpacklo_epi16(x1, x2); - const __m128i t3 = _mm_unpackhi_epi16(x1, x2); - const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04); - const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04); - const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28); - const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28); - const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20); - const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20); - const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12); - const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12); - // dct_const_round_shift + const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); + const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); + const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); + const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); // Combine - res1 = _mm_packs_epi32(w0, w1); - res7 = _mm_packs_epi32(w2, w3); - res5 = _mm_packs_epi32(w4, w5); - res3 = _mm_packs_epi32(w6, w7); - } else { + + res0 = _mm_packs_epi32(w0, w1); + res4 = _mm_packs_epi32(w2, w3); + res2 = _mm_packs_epi32(w4, w5); + res6 = _mm_packs_epi32(w6, w7); + } + // Work on next four results + { // Interleave to do the multiply by constants which gets us into 32bits const __m128i d0 = _mm_sub_epi16(q6, q5); const __m128i d1 = _mm_add_epi16(q6, q5); const __m128i r0 = _mm_mulhrs_epi16(d0, k__dual_p16_p16); const __m128i r1 = _mm_mulhrs_epi16(d1, k__dual_p16_p16); + // Add/subtract const __m128i x0 = _mm_add_epi16(q4, r0); const __m128i x1 = _mm_sub_epi16(q4, r0); diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index d1dd66b2c..fb6aac721 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -694,7 +694,7 @@ static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.gf_cbr_boost_pct = - CAST(VP8E_SET_GF_CBR_BOOST_PCT, args); + CAST(VP9E_SET_GF_CBR_BOOST_PCT, args); return update_extra_cfg(ctx, &extra_cfg); } @@ -1387,7 +1387,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { {VP8E_SET_CQ_LEVEL, ctrl_set_cq_level}, {VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct}, {VP8E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct}, - {VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct}, + {VP9E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct}, {VP9E_SET_LOSSLESS, ctrl_set_lossless}, {VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode}, {VP9E_SET_AQ_MODE, ctrl_set_aq_mode}, |