diff options
-rw-r--r-- | vp9/common/vp9_alloccommon.c | 62 | ||||
-rw-r--r-- | vp9/common/vp9_alloccommon.h | 3 | ||||
-rw-r--r-- | vp9/common/vp9_reconintra.c | 7 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 3 | ||||
-rw-r--r-- | vp9/encoder/vp9_avg.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 8 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 197 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodemb.c | 12 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 138 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 61 | ||||
-rw-r--r-- | vp9/encoder/vp9_mbgraph.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 146 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 7 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 9 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.c | 49 | ||||
-rw-r--r-- | vp9/encoder/vp9_quantize.h | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 11 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.h | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 20 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 4 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_avg_intrin_sse2.c | 15 | ||||
-rw-r--r-- | vp9/vp9_dx_iface.c | 3 |
23 files changed, 424 insertions, 368 deletions
diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 8b04d1b43..e209788c3 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -141,68 +141,6 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { return 1; } -static void init_frame_bufs(VP9_COMMON *cm) { - BufferPool *const pool = cm->buffer_pool; - int i; - - cm->new_fb_idx = FRAME_BUFFERS - 1; - pool->frame_bufs[cm->new_fb_idx].ref_count = 1; - - for (i = 0; i < REF_FRAMES; ++i) { - cm->ref_frame_map[i] = i; - pool->frame_bufs[i].ref_count = 1; - } -} - -int vp9_alloc_ref_frame_buffers(VP9_COMMON *cm, int width, int height) { - int i; - const int ss_x = cm->subsampling_x; - const int ss_y = cm->subsampling_y; - - vp9_free_ref_frame_buffers(cm); - - for (i = 0; i < FRAME_BUFFERS; ++i) { - BufferPool *const pool = cm->buffer_pool; - pool->frame_bufs[i].ref_count = 0; - if (vp9_alloc_frame_buffer(&pool->frame_bufs[i].buf, width, height, - ss_x, ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, - cm->byte_alignment) < 0) - goto fail; - if (pool->frame_bufs[i].mvs == NULL) { - pool->frame_bufs[i].mvs = - (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*pool->frame_bufs[i].mvs)); - if (pool->frame_bufs[i].mvs == NULL) - goto fail; - - pool->frame_bufs[i].mi_rows = cm->mi_rows; - pool->frame_bufs[i].mi_cols = cm->mi_cols; - } - } - - init_frame_bufs(cm); - -#if CONFIG_VP9_POSTPROC - if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, width, height, ss_x, ss_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, - cm->byte_alignment) < 0) - goto fail; -#endif - - return 0; - - fail: - vp9_free_ref_frame_buffers(cm); - return 1; -} - void vp9_remove_common(VP9_COMMON *cm) { vp9_free_ref_frame_buffers(cm); vp9_free_context_buffers(cm); diff --git a/vp9/common/vp9_alloccommon.h b/vp9/common/vp9_alloccommon.h index 09da74e49..d82397fa3 100644 --- a/vp9/common/vp9_alloccommon.h +++ b/vp9/common/vp9_alloccommon.h @@ -12,6 +12,8 @@ #ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ #define VP9_COMMON_VP9_ALLOCCOMMON_H_ +#define INVALID_IDX -1 // Invalid buffer index. + #ifdef __cplusplus extern "C" { #endif @@ -24,7 +26,6 @@ int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_init_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm); -int vp9_alloc_ref_frame_buffers(struct VP9Common *cm, int width, int height); void vp9_free_ref_frame_buffers(struct VP9Common *cm); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index e614e6da8..1668b99ce 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -12,6 +12,7 @@ #include "./vp9_rtcd.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/vpx_once.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_onyxc_int.h" @@ -579,7 +580,7 @@ static intra_high_pred_fn pred_high[INTRA_MODES][4]; static intra_high_pred_fn dc_pred_high[2][2][4]; #endif // CONFIG_VP9_HIGHBITDEPTH -void vp9_init_intra_predictors() { +static void vp9_init_intra_predictors_internal(void) { #define INIT_ALL_SIZES(p, type) \ p[TX_4X4] = vp9_##type##_predictor_4x4; \ p[TX_8X8] = vp9_##type##_predictor_8x8; \ @@ -894,3 +895,7 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, have_top, have_left, have_right, x, y, plane); } + +void vp9_init_intra_predictors() { + once(vp9_init_intra_predictors_internal); +} diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index d345a0578..c183cf38e 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -934,7 +934,6 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, winterface->sync(&pbi->lf_worker); vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, pbi->mb.plane); - vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } assert(tile_rows <= 4); @@ -1362,7 +1361,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; for (i = 0; i < REFS_PER_FRAME; ++i) { - cm->frame_refs[i].idx = -1; + cm->frame_refs[i].idx = INVALID_IDX; cm->frame_refs[i].buf = NULL; } diff --git a/vp9/encoder/vp9_avg.c b/vp9/encoder/vp9_avg.c index 50c8bca0b..90d113c32 100644 --- a/vp9/encoder/vp9_avg.c +++ b/vp9/encoder/vp9_avg.c @@ -32,12 +32,13 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) { void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height) { int idx; + const int norm_factor = MAX(8, height >> 1); for (idx = 0; idx < 16; ++idx) { int i; hbuf[idx] = 0; for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride]; - hbuf[idx] /= 32; + hbuf[idx] /= norm_factor; ++ref; } } @@ -45,9 +46,10 @@ void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref, int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) { int idx; int16_t sum = 0; + const int norm_factor = MAX(8, width >> 1); for (idx = 0; idx < width; ++idx) sum += ref[idx]; - return sum / 32; + return sum / norm_factor; } int vp9_vector_var_c(int16_t const *ref, int16_t const *src, diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 3a70364ae..b24fe2950 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -989,8 +989,6 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame); - found = cm->width == cfg->y_crop_width && - cm->height == cfg->y_crop_height; // Set "found" to 0 for temporal svc and for spatial svc key frame if (cpi->use_svc && @@ -1003,6 +1001,9 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, cpi->svc.layer_context[0].frames_from_key_frame < cpi->svc.number_temporal_layers + 1))) { found = 0; + } else if (cfg != NULL) { + found = cm->width == cfg->y_crop_width && + cm->height == cfg->y_crop_height; } vp9_wb_write_bit(wb, found); if (found) { @@ -1114,7 +1115,8 @@ static void write_uncompressed_header(VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame; vp9_wb_write_literal(wb, get_refresh_mask(cpi), REF_FRAMES); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - vp9_wb_write_literal(wb, get_ref_frame_idx(cpi, ref_frame), + assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX); + vp9_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame), REF_FRAMES_LOG2); vp9_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]); } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 63ca2d34d..a86981a71 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -144,12 +144,14 @@ static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, const struct buf_2d *ref, int mi_row, int mi_col, BLOCK_SIZE bs) { + unsigned int sse, var; + uint8_t *last_y; const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); - const uint8_t* last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride + - mi_col * MI_SIZE]; - unsigned int sse; - const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - last_y, last->y_stride, &sse); + + assert(last != NULL); + last_y = + &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; + var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } @@ -518,156 +520,8 @@ void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) { #define GLOBAL_MOTION 1 #endif -#if GLOBAL_MOTION -static int vector_match(int16_t *ref, int16_t *src, int bwl) { - int best_sad = INT_MAX; - int this_sad; - int d; - int center, offset = 0; - int bw = 4 << bwl; // redundant variable, to be changed in the experiments. - for (d = 0; d <= bw; d += 16) { - this_sad = vp9_vector_var(&ref[d], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - offset = d; - } - } - center = offset; - - for (d = -8; d <= 8; d += 16) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) - continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -4; d <= 4; d += 8) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) - continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -2; d <= 2; d += 4) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) - continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - offset = center; - - for (d = -1; d <= 1; d += 2) { - int this_pos = offset + d; - // check limit - if (this_pos < 0 || this_pos > bw) - continue; - this_sad = vp9_vector_var(&ref[this_pos], src, bwl); - if (this_sad < best_sad) { - best_sad = this_sad; - center = this_pos; - } - } - - return (center - (bw >> 1)); -} - -static const MV search_pos[9] = { - {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1}, - {1, -1}, {1, 0}, {1, 1}, -}; - -static void motion_estimation(VP9_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize) { - MACROBLOCKD *xd = &x->e_mbd; - DECLARE_ALIGNED(16, int16_t, hbuf[128]); - DECLARE_ALIGNED(16, int16_t, vbuf[128]); - DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); - DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); - int idx; - const int bw = 4 << b_width_log2_lookup[bsize]; - const int bh = 4 << b_height_log2_lookup[bsize]; - const int search_width = bw << 1; - const int search_height = bh << 1; - const int src_stride = x->plane[0].src.stride; - const int ref_stride = xd->plane[0].pre[0].stride; - uint8_t const *ref_buf, *src_buf; - MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv; - int best_sad; - MV this_mv; - - // Set up prediction 1-D reference set - ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); - for (idx = 0; idx < search_width; idx += 16) { - vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); - ref_buf += 16; - } - - ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; - for (idx = 0; idx < search_height; ++idx) { - vbuf[idx] = vp9_int_pro_col(ref_buf, bw); - ref_buf += ref_stride; - } - - // Set up src 1-D reference set - for (idx = 0; idx < bw; idx += 16) { - src_buf = x->plane[0].src.buf + idx; - vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); - } - - src_buf = x->plane[0].src.buf; - for (idx = 0; idx < bh; ++idx) { - src_vbuf[idx] = vp9_int_pro_col(src_buf, bw); - src_buf += src_stride; - } - - // Find the best match per 1-D search - tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); - tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); - - best_sad = INT_MAX; - this_mv = *tmp_mv; - for (idx = 0; idx < 9; ++idx) { - int this_sad; - src_buf = x->plane[0].src.buf; - ref_buf = xd->plane[0].pre[0].buf + - (search_pos[idx].row + this_mv.row) * ref_stride + - (search_pos[idx].col + this_mv.col); - - this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, - ref_buf, ref_stride); - if (this_sad < best_sad) { - best_sad = this_sad; - tmp_mv->row = search_pos[idx].row + this_mv.row; - tmp_mv->col = search_pos[idx].col + this_mv.col; - } - } - - tmp_mv->row *= 8; - tmp_mv->col *= 8; - - x->pred_mv[LAST_FRAME] = *tmp_mv; -} -#endif - // This function chooses partitioning based on the variance between source and -// reconstructed last, where variance is computed for downs-sampled inputs. +// reconstructed last, where variance is computed for down-sampled inputs. static void choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, @@ -682,7 +536,6 @@ static void choose_partitioning(VP9_COMP *cpi, int sp; int dp; int pixels_wide = 64, pixels_high = 64; - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); // Always use 4x4 partition for key frame. const int is_key_frame = (cm->frame_type == KEY_FRAME); @@ -709,7 +562,13 @@ static void choose_partitioning(VP9_COMP *cpi, if (!is_key_frame) { MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; - unsigned int var = 0, sse; + unsigned int uv_sad; +#if GLOBAL_MOTION + unsigned int y_sad; + BLOCK_SIZE bsize; +#endif + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); + assert(yv12 != NULL); vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[LAST_FRAME - 1].sf); mbmi->ref_frame[0] = LAST_FRAME; @@ -719,7 +578,16 @@ static void choose_partitioning(VP9_COMP *cpi, mbmi->interp_filter = BILINEAR; #if GLOBAL_MOTION - motion_estimation(cpi, x, BLOCK_64X64); + if (mi_row + 4 < cm->mi_rows && mi_col + 4 < cm->mi_cols) + bsize = BLOCK_64X64; + else if (mi_row + 4 < cm->mi_rows && mi_col + 4 >= cm->mi_cols) + bsize = BLOCK_32X64; + else if (mi_row + 4 >= cm->mi_rows && mi_col + 4 < cm->mi_cols) + bsize = BLOCK_64X32; + else + bsize = BLOCK_32X32; + + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize); #endif vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); @@ -728,10 +596,14 @@ static void choose_partitioning(VP9_COMP *cpi, struct macroblock_plane *p = &x->plane[i]; struct macroblockd_plane *pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(BLOCK_64X64, pd); - var += cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, &sse); - if (sse > 2048) - x->color_sensitivity[i - 1] = 1; + uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); + +#if GLOBAL_MOTION + x->color_sensitivity[i - 1] = uv_sad * 4 > y_sad; +#else + x->color_sensitivity[i - 1] = (uv_sad > 512); +#endif } d = xd->plane[0].dst.buf; @@ -3897,7 +3769,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. cm->prev_mi = cm->use_prev_frame_mvs ? - cm->prev_mip + cm->mi_stride + 1 : NULL; + cm->prev_mip + cm->mi_stride + 1 : NULL; x->quant_fp = cpi->sf.use_quant_fp; vp9_zero(x->skip_txfm); @@ -4169,6 +4041,7 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]); + assert(cfg != NULL); vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 70b804e31..65e299793 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -476,19 +476,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, break; case TX_16X16: vp9_highbd_fdct16x16_1(src_diff, coeff, diff_stride); - vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, + vp9_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_8X8: vp9_highbd_fdct8x8_1(src_diff, coeff, diff_stride); - vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, + vp9_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); - vp9_highbd_quantize_dc(coeff, x->skip_block, p->round, + vp9_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; @@ -508,19 +508,19 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, break; case TX_16X16: vp9_fdct16x16_1(src_diff, coeff, diff_stride); - vp9_quantize_dc(coeff, x->skip_block, p->round, + vp9_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_8X8: vp9_fdct8x8_1(src_diff, coeff, diff_stride); - vp9_quantize_dc(coeff, x->skip_block, p->round, + vp9_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); - vp9_quantize_dc(coeff, x->skip_block, p->round, + vp9_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index bd7e0b64c..249c2363d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -483,6 +483,7 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffers"); + // TODO(agrange) Check if ARF is enabled and skip allocation if not. if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height, cm->subsampling_x, cm->subsampling_y, @@ -495,13 +496,6 @@ static void alloc_raw_frame_buffers(VP9_COMP *cpi) { "Failed to allocate altref buffer"); } -static void alloc_ref_frame_buffers(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if (vp9_alloc_ref_frame_buffers(cm, cm->width, cm->height)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to allocate frame buffers"); -} - static void alloc_util_frame_buffers(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (vp9_realloc_frame_buffer(&cpi->last_frame_uf, @@ -2483,6 +2477,21 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vp9_extend_frame_inner_borders(cm->frame_to_show); } +static INLINE void alloc_frame_mvs(const VP9_COMMON *cm, + int buffer_idx) { + RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; + if (new_fb_ptr->mvs == NULL || + new_fb_ptr->mi_rows < cm->mi_rows || + new_fb_ptr->mi_cols < cm->mi_cols) { + vpx_free(new_fb_ptr->mvs); + new_fb_ptr->mvs = + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*new_fb_ptr->mvs)); + new_fb_ptr->mi_rows = cm->mi_rows; + new_fb_ptr->mi_cols = cm->mi_cols; + } +} + void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; @@ -2491,13 +2500,19 @@ void vp9_scale_references(VP9_COMP *cpi) { for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) { - const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - BufferPool *const pool = cm->buffer_pool; - const YV12_BUFFER_CONFIG *const ref = &pool->frame_bufs[idx].buf; + BufferPool *const pool = cm->buffer_pool; + const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, + ref_frame); + + if (ref == NULL) { + cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; + continue; + } #if CONFIG_VP9_HIGHBITDEPTH if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); + RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb]; cm->cur_frame = &pool->frame_bufs[new_fb]; vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf, cm->width, cm->height, @@ -2505,35 +2520,28 @@ void vp9_scale_references(VP9_COMP *cpi) { cm->use_highbitdepth, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL); - scale_and_extend_frame(ref, &pool->frame_bufs[new_fb].buf, - (int)cm->bit_depth); + scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth); #else if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { const int new_fb = get_free_fb(cm); - vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf, + RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb]; + vp9_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL); - scale_and_extend_frame(ref, &pool->frame_bufs[new_fb].buf); + scale_and_extend_frame(ref, &new_fb_ptr->buf); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->scaled_ref_idx[ref_frame - 1] = new_fb; - if (pool->frame_bufs[new_fb].mvs == NULL || - pool->frame_bufs[new_fb].mi_rows < cm->mi_rows || - pool->frame_bufs[new_fb].mi_cols < cm->mi_cols) { - vpx_free(pool->frame_bufs[new_fb].mvs); - pool->frame_bufs[new_fb].mvs = - (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, - sizeof(*pool->frame_bufs[new_fb].mvs)); - pool->frame_bufs[new_fb].mi_rows = cm->mi_rows; - pool->frame_bufs[new_fb].mi_cols = cm->mi_cols; - } + + alloc_frame_mvs(cm, new_fb); } else { - cpi->scaled_ref_idx[ref_frame - 1] = idx; - ++pool->frame_bufs[idx].ref_count; + const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + cpi->scaled_ref_idx[ref_frame - 1] = buf_idx; + ++pool->frame_bufs[buf_idx].ref_count; } } else { - cpi->scaled_ref_idx[ref_frame - 1] = INVALID_REF_BUFFER_IDX; + cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; } } } @@ -2543,11 +2551,11 @@ static void release_scaled_references(VP9_COMP *cpi) { int i; for (i = 0; i < MAX_REF_FRAMES; ++i) { const int idx = cpi->scaled_ref_idx[i]; - RefCntBuffer *const buf = idx != INVALID_REF_BUFFER_IDX ? + RefCntBuffer *const buf = idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; if (buf != NULL) { --buf->ref_count; - cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX; + cpi->scaled_ref_idx[i] = INVALID_IDX; } } } @@ -2751,6 +2759,8 @@ void set_frame_size(VP9_COMP *cpi) { vp9_set_target_rate(cpi); } + alloc_frame_mvs(cm, cm->new_fb_idx); + // Reset the frame pointers to the current frame size. vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, @@ -2765,24 +2775,30 @@ void set_frame_size(VP9_COMP *cpi) { init_motion_estimation(cpi); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; - YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[idx].buf; RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; - ref_buf->buf = buf; - ref_buf->idx = idx; + const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + + ref_buf->idx = buf_idx; + + if (buf_idx != INVALID_IDX) { + YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf; + ref_buf->buf = buf; #if CONFIG_VP9_HIGHBITDEPTH - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - buf->y_crop_width, buf->y_crop_height, - cm->width, cm->height, - (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? - 1 : 0); + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + buf->y_crop_width, buf->y_crop_height, + cm->width, cm->height, + (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? + 1 : 0); #else - vp9_setup_scale_factors_for_frame(&ref_buf->sf, - buf->y_crop_width, buf->y_crop_height, - cm->width, cm->height); + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + buf->y_crop_width, buf->y_crop_height, + cm->width, cm->height); #endif // CONFIG_VP9_HIGHBITDEPTH - if (vp9_is_scaled(&ref_buf->sf)) - vp9_extend_frame_borders(buf); + if (vp9_is_scaled(&ref_buf->sf)) + vp9_extend_frame_borders(buf); + } else { + ref_buf->buf = NULL; + } } set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); @@ -3448,6 +3464,16 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, vp9_twopass_postencode_update(cpi); } +static void init_ref_frame_bufs(VP9_COMMON *cm) { + int i; + BufferPool *const pool = cm->buffer_pool; + cm->new_fb_idx = INVALID_IDX; + for (i = 0; i < REF_FRAMES; ++i) { + cm->ref_frame_map[i] = INVALID_IDX; + pool->frame_bufs[i].ref_count = 0; + } +} + static void check_initial_width(VP9_COMP *cpi, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, @@ -3468,7 +3494,7 @@ static void check_initial_width(VP9_COMP *cpi, #endif alloc_raw_frame_buffers(cpi); - alloc_ref_frame_buffers(cpi); + init_ref_frame_bufs(cm); alloc_util_frame_buffers(cpi); init_motion_estimation(cpi); // TODO(agrange) This can be removed. @@ -3793,8 +3819,14 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Find a free buffer for the new frame, releasing the reference previously // held. - pool->frame_bufs[cm->new_fb_idx].ref_count--; + if (cm->new_fb_idx != INVALID_IDX) { + --pool->frame_bufs[cm->new_fb_idx].ref_count; + } cm->new_fb_idx = get_free_fb(cm); + + if (cm->new_fb_idx == INVALID_IDX) + return -1; + cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; if (!cpi->use_svc && cpi->multi_arf_allowed) { @@ -3821,7 +3853,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } for (i = 0; i < MAX_REF_FRAMES; ++i) - cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX; + cpi->scaled_ref_idx[i] = INVALID_IDX; if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) { @@ -3907,8 +3939,18 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, PSNR_STATS psnr2; double frame_ssim2 = 0, weight = 0; #if CONFIG_VP9_POSTPROC - // TODO(agrange) Add resizing of post-proc buffer in here when the - // encoder is changed to use on-demand buffer allocation. + if (vp9_alloc_frame_buffer(&cm->post_proc_buffer, + recon->y_crop_width, recon->y_crop_height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_ENC_BORDER_IN_PIXELS, + cm->byte_alignment) < 0) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate post processing buffer"); + } + vp9_deblock(cm->frame_to_show, &cm->post_proc_buffer, cm->lf.filter_level * 10 / 6); #endif diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 69edfded1..c3679ca0b 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -17,6 +17,7 @@ #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" +#include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_ppflags.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" @@ -47,7 +48,6 @@ extern "C" { #endif #define DEFAULT_GF_INTERVAL 10 -#define INVALID_REF_BUFFER_IDX -1 // Marks an invalid reference buffer id. typedef struct { int nmvjointcost[MV_JOINTS]; @@ -517,8 +517,8 @@ static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) { (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); } -static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { +static INLINE int get_ref_frame_map_idx(const VP9_COMP *cpi, + MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { @@ -528,12 +528,19 @@ static INLINE int get_ref_frame_idx(const VP9_COMP *cpi, } } +static INLINE int get_ref_frame_buf_idx(const VP9_COMP *const cpi, + int ref_frame) { + const VP9_COMMON *const cm = &cpi->common; + const int map_idx = get_ref_frame_map_idx(cpi, ref_frame); + return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX; +} + static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { VP9_COMMON *const cm = &cpi->common; - BufferPool *const pool = cm->buffer_pool; - return &pool->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]] - .buf; + const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); + return + buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL; } static INLINE int get_token_alloc(int mb_rows, int mb_cols) { diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index b813e7de0..6c8bbdb6d 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -66,12 +66,6 @@ unsigned int arf_count = 0; #endif -static void swap_yv12(YV12_BUFFER_CONFIG *a, YV12_BUFFER_CONFIG *b) { - YV12_BUFFER_CONFIG temp = *a; - *a = *b; - *b = temp; -} - // Resets the first pass file to the given position using a relative seek from // the current position. static void reset_fpf_position(TWO_PASS *p, @@ -465,12 +459,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { int i; int recon_yoffset, recon_uvoffset; - YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); - int recon_y_stride = lst_yv12->y_stride; - int recon_uv_stride = lst_yv12->uv_stride; - int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); int64_t intra_error = 0; int64_t coded_error = 0; int64_t sr_coded_error = 0; @@ -488,11 +476,22 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { MV lastmv = {0, 0}; TWO_PASS *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; + int recon_y_stride, recon_uv_stride, uv_mb_height; + + YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); + YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; + LAYER_CONTEXT *const lc = is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] : NULL; double intra_factor; double brightness_factor; + BufferPool *const pool = cm->buffer_pool; + + // First pass code requires valid last and new frame buffers. + assert(new_yv12 != NULL); + assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -537,21 +536,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { } if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - BufferPool *const pool = cm->buffer_pool; - const int ref_idx = - cm->ref_frame_map[get_ref_frame_idx(cpi, GOLDEN_FRAME)]; - const int scaled_idx = cpi->scaled_ref_idx[GOLDEN_FRAME - 1]; - - gld_yv12 = (scaled_idx != ref_idx) ? &pool->frame_bufs[scaled_idx].buf : - get_ref_frame_buffer(cpi, GOLDEN_FRAME); + gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); + if (gld_yv12 == NULL) { + gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + } } else { gld_yv12 = NULL; } - recon_y_stride = new_yv12->y_stride; - recon_uv_stride = new_yv12->uv_stride; - uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); - set_ref_ptrs(cm, xd, (cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME: NONE, (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE); @@ -563,9 +555,12 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_setup_src_planes(x, cpi->Source, 0, 0); - vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0); + if (!frame_is_intra_only(cm)) { + vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); + } + xd->mi = cm->mi; xd->mi[0].src_mi = &xd->mi[0]; @@ -585,6 +580,10 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // Tiling is ignored in the first pass. vp9_tile_init(&tile, cm, 0, 0); + recon_y_stride = new_yv12->y_stride; + recon_uv_stride = new_yv12->uv_stride; + uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); + for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { MV best_ref_mv = {0, 0}; @@ -1020,7 +1019,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { if (gld_yv12 != NULL) { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], + cm->ref_frame_map[cpi->lst_fb_idx]); } twopass->sr_update_lag = 1; } else { @@ -1032,14 +1032,17 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { if (lc != NULL) { vp9_update_reference_frames(cpi); } else { - // Swap frame pointers so last frame refers to the frame we just compressed. - swap_yv12(lst_yv12, new_yv12); + // The frame we just compressed now becomes the last frame. + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], + cm->new_fb_idx); } // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0 && gld_yv12 != NULL && lc == NULL) { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); + if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX && + lc == NULL) { + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], + cm->ref_frame_map[cpi->lst_fb_idx]); } // Use this to see what the first pass reconstruction looks like. diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 18a8c72c4..b3a8df924 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -376,6 +376,8 @@ void vp9_update_mbgraph_stats(VP9_COMP *cpi) { int i, n_frames = vp9_lookahead_depth(cpi->lookahead); YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + assert(golden_ref != NULL); + // we need to look ahead beyond where the ARF transitions into // being a GF - so exit if we don't look ahead beyond that if (n_frames <= cpi->rc.frames_till_gf_update_due) diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 159e0fc0c..c49a8bef3 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1714,6 +1714,152 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, return bestsad; } +static int vector_match(int16_t *ref, int16_t *src, int bwl) { + int best_sad = INT_MAX; + int this_sad; + int d; + int center, offset = 0; + int bw = 4 << bwl; // redundant variable, to be changed in the experiments. + for (d = 0; d <= bw; d += 16) { + this_sad = vp9_vector_var(&ref[d], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + offset = d; + } + } + center = offset; + + for (d = -8; d <= 8; d += 16) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) + continue; + this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -4; d <= 4; d += 8) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) + continue; + this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -2; d <= 2; d += 4) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) + continue; + this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + offset = center; + + for (d = -1; d <= 1; d += 2) { + int this_pos = offset + d; + // check limit + if (this_pos < 0 || this_pos > bw) + continue; + this_sad = vp9_vector_var(&ref[this_pos], src, bwl); + if (this_sad < best_sad) { + best_sad = this_sad; + center = this_pos; + } + } + + return (center - (bw >> 1)); +} + +static const MV search_pos[9] = { + {-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 0}, {0, 1}, + {1, -1}, {1, 0}, {1, 1}, +}; + +unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize) { + MACROBLOCKD *xd = &x->e_mbd; + DECLARE_ALIGNED(16, int16_t, hbuf[128]); + DECLARE_ALIGNED(16, int16_t, vbuf[128]); + DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); + DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); + int idx; + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int search_width = bw << 1; + const int search_height = bh << 1; + const int src_stride = x->plane[0].src.stride; + const int ref_stride = xd->plane[0].pre[0].stride; + uint8_t const *ref_buf, *src_buf; + MV *tmp_mv = &xd->mi[0].src_mi->mbmi.mv[0].as_mv; + int best_sad; + MV this_mv; + + // Set up prediction 1-D reference set + ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); + for (idx = 0; idx < search_width; idx += 16) { + vp9_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); + ref_buf += 16; + } + + ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; + for (idx = 0; idx < search_height; ++idx) { + vbuf[idx] = vp9_int_pro_col(ref_buf, bw); + ref_buf += ref_stride; + } + + // Set up src 1-D reference set + for (idx = 0; idx < bw; idx += 16) { + src_buf = x->plane[0].src.buf + idx; + vp9_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); + } + + src_buf = x->plane[0].src.buf; + for (idx = 0; idx < bh; ++idx) { + src_vbuf[idx] = vp9_int_pro_col(src_buf, bw); + src_buf += src_stride; + } + + // Find the best match per 1-D search + tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); + tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); + + best_sad = INT_MAX; + this_mv = *tmp_mv; + for (idx = 0; idx < 9; ++idx) { + int this_sad; + src_buf = x->plane[0].src.buf; + ref_buf = xd->plane[0].pre[0].buf + + (search_pos[idx].row + this_mv.row) * ref_stride + + (search_pos[idx].col + this_mv.col); + + this_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, + ref_buf, ref_stride); + if (this_sad < best_sad) { + best_sad = this_sad; + tmp_mv->row = search_pos[idx].row + this_mv.row; + tmp_mv->col = search_pos[idx].col + this_mv.col; + } + } + tmp_mv->row *= 8; + tmp_mv->col *= 8; + x->pred_mv[LAST_FRAME] = *tmp_mv; + + return best_sad; +} + /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index bba48fd6e..dd8a46079 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -72,7 +72,7 @@ int vp9_refining_search_sad(const struct macroblock *x, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); -// Runs sequence of diamond searches in smaller steps for RD +// Runs sequence of diamond searches in smaller steps for RD. int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, @@ -80,6 +80,11 @@ int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv); +// Perform integral projection based motion estimation. +unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, + MACROBLOCK *x, + BLOCK_SIZE bsize); + typedef int (integer_mv_pattern_search_fn) ( const MACROBLOCK *x, MV *ref_mv, diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 1e88201be..2f9cccb99 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -689,12 +689,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #endif for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + x->pred_mv_sad[ref_frame] = INT_MAX; frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { int_mv *const candidates = mbmi->ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; @@ -1076,11 +1077,11 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, ctx->pred_pixel_ready = 0; for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); int_mv dummy_mv[2]; x->pred_mv_sad[ref_frame] = INT_MAX; - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { int_mv *const candidates = mbmi->ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 7143987d4..2523d1ea3 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -19,7 +19,8 @@ #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rd.h" -void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, +void vp9_quantize_dc(const tran_low_t *coeff_ptr, + int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { @@ -29,6 +30,9 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 16; @@ -41,12 +45,16 @@ void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, } #if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, +void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, + int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { int eob = -1; + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { const int rc = 0; const int coeff = coeff_ptr[rc]; @@ -69,15 +77,20 @@ void vp9_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { + const int n_coeffs = 1024; const int rc = 0; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { - tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), + INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; @@ -96,8 +109,12 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr) { + const int n_coeffs = 1024; int eob = -1; + vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { const int rc = 0; const int coeff = coeff_ptr[rc]; @@ -105,8 +122,8 @@ void vp9_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = - (clamp(abs_coeff + round_ptr[rc != 0], INT32_MIN, INT32_MAX) * - quant) >> 15; + (clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), + INT32_MIN, INT32_MAX) * quant) >> 15; qcoeff_ptr[rc] = (tran_low_t)((tmp ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2; if (tmp) @@ -521,21 +538,21 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block), - 16, x->skip_block, - p->zbin, p->round, p->quant, p->quant_shift, - BLOCK_OFFSET(p->qcoeff, block), - BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, &p->eobs[block], - scan, iscan); + 16, x->skip_block, + p->zbin, p->round, p->quant, p->quant_shift, + BLOCK_OFFSET(p->qcoeff, block), + BLOCK_OFFSET(pd->dqcoeff, block), + pd->dequant, &p->eobs[block], + scan, iscan); return; } #endif vp9_quantize_b(BLOCK_OFFSET(p->coeff, block), - 16, x->skip_block, - p->zbin, p->round, p->quant, p->quant_shift, - BLOCK_OFFSET(p->qcoeff, block), - BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, &p->eobs[block], scan, iscan); + 16, x->skip_block, + p->zbin, p->round, p->quant, p->quant_shift, + BLOCK_OFFSET(p->qcoeff, block), + BLOCK_OFFSET(pd->dqcoeff, block), + pd->dequant, &p->eobs[block], scan, iscan); } static void invert_quant(int16_t *quant, int16_t *shift, int d) { diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index de2839f5b..55e546944 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -37,7 +37,8 @@ typedef struct { DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); } QUANTS; -void vp9_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, +void vp9_quantize_dc(const tran_low_t *coeff_ptr, + int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); @@ -49,7 +50,8 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); #if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, int skip_block, +void vp9_highbd_quantize_dc(const tran_low_t *coeff_ptr, + int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 0b6d11eca..5cc980cf7 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -532,13 +532,14 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, return base + vp9_raster_block_offset(plane_bsize, raster_block, stride); } -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, - int ref_frame) { +YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, + int ref_frame) { const VP9_COMMON *const cm = &cpi->common; - const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]; const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; - return (scaled_idx != ref_idx) ? - &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL; + const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); + return + (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ? + &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL; } int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 59a87cf98..b18a81659 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -147,8 +147,8 @@ int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, int16_t *base); -const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, - int ref_frame); +YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, + int ref_frame); void vp9_init_me_luts(); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 53a5dba19..51397a791 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -328,12 +328,12 @@ static const int16_t band_counts[TX_SIZES][8] = { { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; -static INLINE int cost_coeffs(MACROBLOCK *x, - int plane, int block, - ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, - TX_SIZE tx_size, - const int16_t *scan, const int16_t *nb, - int use_fast_coef_costing) { +static int cost_coeffs(MACROBLOCK *x, + int plane, int block, + ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, + TX_SIZE tx_size, + const int16_t *scan, const int16_t *nb, + int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; const struct macroblock_plane *p = &x->plane[plane]; @@ -1316,8 +1316,8 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i, for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) - vpx_memcpy(&mic->bmi[i + idy * 2 + idx], - &mic->bmi[i], sizeof(mic->bmi[i])); + vpx_memmove(&mic->bmi[i + idy * 2 + idx], + &mic->bmi[i], sizeof(mic->bmi[i])); return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) + thismvcost; @@ -2022,6 +2022,8 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; + assert(yv12 != NULL); + // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); @@ -2912,6 +2914,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if (cpi->ref_frame_flags & flag_list[ref_frame]) { + assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); setup_buffer_inter(cpi, x, tile_info, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } @@ -4238,4 +4241,3 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_tx_diff, best_filter_diff, 0); } - diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 161b5a24d..bf9cad00a 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -91,8 +91,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { if (oxcf->ss_enable_auto_arf[layer]) lc->alt_ref_idx = alt_ref_idx++; else - lc->alt_ref_idx = -1; - lc->gold_ref_idx = -1; + lc->alt_ref_idx = INVALID_IDX; + lc->gold_ref_idx = INVALID_IDX; } lrc->buffer_level = oxcf->starting_buffer_level_ms * diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c index 482fa3da3..f49949940 100644 --- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c @@ -90,8 +90,16 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref, s0 = _mm_adds_epu16(s0, t0); s1 = _mm_adds_epu16(s1, t1); - s0 = _mm_srai_epi16(s0, 5); - s1 = _mm_srai_epi16(s1, 5); + if (height == 64) { + s0 = _mm_srai_epi16(s0, 5); + s1 = _mm_srai_epi16(s1, 5); + } else if (height == 32) { + s0 = _mm_srai_epi16(s0, 4); + s1 = _mm_srai_epi16(s1, 4); + } else { + s0 = _mm_srai_epi16(s0, 3); + s1 = _mm_srai_epi16(s1, 3); + } _mm_store_si128((__m128i *)hbuf, s0); hbuf += 8; @@ -104,6 +112,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { __m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s1; int i; + const int norm_factor = 3 + (width >> 5); for (i = 16; i < width; i += 16) { ref += 16; @@ -115,7 +124,7 @@ int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) { s1 = _mm_srli_si128(s0, 8); s0 = _mm_adds_epu16(s0, s1); - return (_mm_extract_epi16(s0, 0)) >> 5; + return _mm_extract_epi16(s0, 0) >> norm_factor; } int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c index 9bb880c7c..9e4c1a5c4 100644 --- a/vp9/vp9_dx_iface.c +++ b/vp9/vp9_dx_iface.c @@ -278,7 +278,7 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { VP9_COMMON *const cm = &frame_worker_data->pbi->common; BufferPool *const pool = cm->buffer_pool; - cm->new_fb_idx = -1; + cm->new_fb_idx = INVALID_IDX; cm->byte_alignment = ctx->byte_alignment; if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { @@ -500,7 +500,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, check_resync(ctx, frame_worker_data->pbi); } else { - const VP9WorkerInterface *const winterface = vp9_get_worker_interface(); VP9Worker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; // Copy context from last worker thread to next worker thread. |