diff options
-rw-r--r-- | examples.mk | 1 | ||||
-rw-r--r-- | libs.mk | 5 | ||||
-rw-r--r-- | mainpage.dox | 2 | ||||
-rw-r--r-- | tools/tiny_ssim.c | 38 | ||||
-rw-r--r-- | usage_cx.dox | 2 | ||||
-rw-r--r-- | usage_dx.dox | 2 | ||||
-rw-r--r-- | vp8/common/mfqe.c | 2 | ||||
-rw-r--r-- | vp8/common/postproc.c | 2 | ||||
-rw-r--r-- | vp8/decoder/onyxd_if.c | 16 | ||||
-rw-r--r-- | vp8/encoder/onyx_if.c | 8 | ||||
-rw-r--r-- | vp8/vp8_cx_iface.c | 15 | ||||
-rw-r--r-- | vp8/vp8_dx_iface.c | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 266 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 1 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 43 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.h | 7 |
18 files changed, 230 insertions, 208 deletions
diff --git a/examples.mk b/examples.mk index 1187f147a..a1d4eb68c 100644 --- a/examples.mk +++ b/examples.mk @@ -404,3 +404,4 @@ CLEAN-OBJS += examples.doxy samples.dox $(ALL_EXAMPLES:.c=.dox) DOCS-yes += examples.doxy samples.dox examples.doxy: samples.dox $(ALL_EXAMPLES:.c=.dox) @echo "INPUT += $^" > $@ + @echo "ENABLED_SECTIONS += samples" >> $@ @@ -112,11 +112,6 @@ ifeq ($(CONFIG_DECODERS),yes) CODEC_DOC_SECTIONS += decoder endif -# Suppress -Wextra warnings in first party code pending investigation. -# https://bugs.chromium.org/p/webm/issues/detail?id=1246 -$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered -$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered - ifeq ($(CONFIG_MSVS),yes) CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) diff --git a/mainpage.dox b/mainpage.dox index ec202fa4f..4b0dff087 100644 --- a/mainpage.dox +++ b/mainpage.dox @@ -25,8 +25,10 @@ release. - The \ref readme contains instructions on recompiling the sample applications. - Read the \ref usage "usage" for a narrative on codec usage. + \if samples - Read the \ref samples "sample code" for examples of how to interact with the codec. + \endif - \ref codec reference \if encoder - \ref encoder reference diff --git a/tools/tiny_ssim.c b/tools/tiny_ssim.c index 1f73c73c1..67a6903b5 100644 --- a/tools/tiny_ssim.c +++ b/tools/tiny_ssim.c @@ -50,7 +50,8 @@ static uint64_t calc_plane_error16(uint16_t *orig, int orig_stride, } return total_sse; } -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH + static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, uint8_t *recon, int recon_stride, unsigned int cols, unsigned int rows) { @@ -269,6 +270,7 @@ static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); } +#if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t bd, uint32_t shift) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; @@ -277,6 +279,7 @@ static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift), sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd); } +#endif // CONFIG_VP9_HIGHBITDEPTH // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap @@ -300,6 +303,7 @@ static double ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, return ssim_total; } +#if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, int height, uint32_t bd, uint32_t shift) { @@ -321,6 +325,7 @@ static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2, ssim_total /= samples; return ssim_total; } +#endif // CONFIG_VP9_HIGHBITDEPTH // traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity // @@ -565,35 +570,6 @@ double get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, return inconsistency_total; } -double highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, - const YV12_BUFFER_CONFIG *dest, double *weight, - uint32_t bd, uint32_t in_bd) { - double a, b, c; - double ssimv; - uint32_t shift = 0; - - assert(bd >= in_bd); - shift = bd - in_bd; - - a = highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, - dest->y_stride, source->y_crop_width, source->y_crop_height, - in_bd, shift); - - b = highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height, in_bd, shift); - - c = highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, - dest->uv_stride, source->uv_crop_width, - source->uv_crop_height, in_bd, shift); - - ssimv = a * .8 + .1 * (b + c); - - *weight = 1; - - return ssimv; -} - int main(int argc, char *argv[]) { FILE *framestats = NULL; int bit_depth = 8; @@ -711,7 +687,7 @@ int main(int argc, char *argv[]) { #define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \ ssim = ssim2(buf0, buf1, w, w, w, h); \ psnr = calc_plane_error(buf0, w, buf1, w, w, h); -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH if (n_frames == allocated_frames) { allocated_frames = allocated_frames == 0 ? 1024 : allocated_frames * 2; diff --git a/usage_cx.dox b/usage_cx.dox index 92b0d34ef..b2220cfdd 100644 --- a/usage_cx.dox +++ b/usage_cx.dox @@ -8,6 +8,8 @@ \ref usage_deadline. + \if samples \ref samples + \endif */ diff --git a/usage_dx.dox b/usage_dx.dox index 883ce2492..85063f705 100644 --- a/usage_dx.dox +++ b/usage_dx.dox @@ -11,7 +11,9 @@ \ref usage_postproc based on the amount of free CPU time. For more information on the <code>deadline</code> parameter, see \ref usage_deadline. + \if samples \ref samples + \endif \section usage_cb Callback Based Decoding diff --git a/vp8/common/mfqe.c b/vp8/common/mfqe.c index aad908572..1fe7363f1 100644 --- a/vp8/common/mfqe.c +++ b/vp8/common/mfqe.c @@ -235,7 +235,7 @@ void vp8_multiframe_quality_enhance(VP8_COMMON *cm) { FRAME_TYPE frame_type = cm->frame_type; /* Point at base of Mb MODE_INFO list has motion vectors etc */ - const MODE_INFO *mode_info_context = cm->show_frame_mi; + const MODE_INFO *mode_info_context = cm->mi; int mb_row; int mb_col; int totmap, map[4]; diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 8c292d616..1ff2e5cc5 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -325,7 +325,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vpx_clear_system_state(); if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid && - oci->current_video_frame >= 2 && + oci->current_video_frame > 10 && oci->postproc_state.last_base_qindex < 60 && oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) { vp8_multiframe_quality_enhance(oci); diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index bb34821f3..918c68626 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -321,22 +321,6 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size, pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; - if (setjmp(pbi->common.error.jmp)) { - /* We do not know if the missing frame(s) was supposed to update - * any of the reference buffers, but we act conservative and - * mark only the last buffer as corrupted. - */ - cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; - - if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) { - cm->fb_idx_ref_cnt[cm->new_fb_idx]--; - } - pbi->common.error.setjmp = 0; - goto decode_exit; - } - - pbi->common.error.setjmp = 1; - retcode = vp8_decode_frame(pbi); if (retcode < 0) { diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 8de8ca18e..adc25024c 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -4813,14 +4813,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, cm = &cpi->common; - if (setjmp(cpi->common.error.jmp)) { - cpi->common.error.setjmp = 0; - vpx_clear_system_state(); - return VPX_CODEC_CORRUPT_FRAME; - } - - cpi->common.error.setjmp = 1; - vpx_usec_timer_start(&cmptimer); cpi->source = NULL; diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 71eadeb08..d01d2095f 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -16,6 +16,7 @@ #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "vpx_mem/vpx_mem.h" +#include "vpx_ports/system_state.h" #include "vpx_ports/vpx_once.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vp8cx.h" @@ -796,9 +797,11 @@ static vpx_codec_err_t set_reference_and_update(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, - vpx_enc_frame_flags_t flags, + vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { - vpx_codec_err_t res = VPX_CODEC_OK; + volatile vpx_codec_err_t res = VPX_CODEC_OK; + // Make a copy as volatile to avoid -Wclobbered with longjmp. + volatile vpx_enc_frame_flags_t flags = enc_flags; if (!ctx->cfg.rc_target_bitrate) { #if CONFIG_MULTI_RES_ENCODING @@ -840,6 +843,12 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, } } + if (setjmp(ctx->cpi->common.error.jmp)) { + ctx->cpi->common.error.setjmp = 0; + vpx_clear_system_state(); + return VPX_CODEC_CORRUPT_FRAME; + } + /* Initialize the encoder instance on the first frame*/ if (!res && ctx->cpi) { unsigned int lib_flags; @@ -886,6 +895,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, cx_data_end = ctx->cx_data + cx_data_sz; lib_flags = 0; + ctx->cpi->common.error.setjmp = 1; + while (cx_data_sz >= ctx->cx_data_sz / 2) { comp_data_state = vp8_get_compressed_data( ctx->cpi, &lib_flags, &size, cx_data, cx_data_end, &dst_time_stamp, diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index a2008b903..6d1c5f595 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -335,8 +335,8 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, if (!res) { VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; + VP8_COMMON *const pc = &pbi->common; if (resolution_change) { - VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; #if CONFIG_MULTITHREAD int i; @@ -428,6 +428,23 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, pbi->common.fb_idx_ref_cnt[0] = 0; } + if (setjmp(pbi->common.error.jmp)) { + /* We do not know if the missing frame(s) was supposed to update + * any of the reference buffers, but we act conservative and + * mark only the last buffer as corrupted. + */ + pc->yv12_fb[pc->lst_fb_idx].corrupted = 1; + + if (pc->fb_idx_ref_cnt[pc->new_fb_idx] > 0) { + pc->fb_idx_ref_cnt[pc->new_fb_idx]--; + } + pc->error.setjmp = 0; + res = update_error_state(ctx, &pbi->common.error); + return res; + } + + pbi->common.error.setjmp = 1; + /* update the pbi fragment data */ pbi->fragments = ctx->fragments; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 98343f0d2..9c82f2ebe 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1382,7 +1382,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, x->sb_mvrow_part = mi->mv[0].as_mv.row; if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->svc.spatial_layer_id == 0 && - cpi->rc.high_num_blocks_with_motion && !x->zero_temp_sad_source && + cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && cm->width > 640 && cm->height > 480) { // Disable split below 16x16 block size when scroll motion is detected. // TODO(marpan/jianj): Improve this condition: issue is that search @@ -5734,7 +5734,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { int64_t mc_dep_cost_base = 0; int row, col; - for (row = 0; row < cm->mi_rows; ++row) { + for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { for (col = 0; col < cm->mi_cols; ++col) { TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; intra_cost_base += this_stats->intra_cost; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 0b1d74a59..b10b91c98 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2110,7 +2110,7 @@ static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, BufferPool *const pool) { - unsigned int i, frame; + unsigned int i; VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL; @@ -2361,51 +2361,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_set_speed_features_framesize_independent(cpi); vp9_set_speed_features_framesize_dependent(cpi); - if (cpi->sf.enable_tpl_model) { - const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); #if CONFIG_NON_GREEDY_MV - CHECK_MEM_ERROR( - cm, cpi->feature_score_loc_arr, - vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr))); - CHECK_MEM_ERROR( - cm, cpi->feature_score_loc_sort, - vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort))); - CHECK_MEM_ERROR( - cm, cpi->feature_score_loc_heap, - vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap))); -#endif - // TODO(jingning): Reduce the actual memory use for tpl model build up. - for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { -#if CONFIG_NON_GREEDY_MV - int sqr_bsize; - int rf_idx; - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { - CHECK_MEM_ERROR( - cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize], - vpx_calloc(mi_rows * mi_cols, - sizeof(*cpi->tpl_stats[frame] - .pyramid_mv_arr[rf_idx][sqr_bsize]))); - } - } -#endif - CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, - vpx_calloc(mi_rows * mi_cols, - sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); - cpi->tpl_stats[frame].is_valid = 0; - cpi->tpl_stats[frame].width = mi_cols; - cpi->tpl_stats[frame].height = mi_rows; - cpi->tpl_stats[frame].stride = mi_cols; - cpi->tpl_stats[frame].mi_rows = cm->mi_rows; - cpi->tpl_stats[frame].mi_cols = cm->mi_cols; - } - - for (frame = 0; frame < REF_FRAMES; ++frame) { - cpi->enc_frame_buf[frame].mem_valid = 0; - cpi->enc_frame_buf[frame].released = 1; - } - } + cpi->feature_score_loc_alloc = 0; +#endif // CONFIG_NON_GREEDY_MV + for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL; // Allocate memory to store variances for a frame. CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); @@ -3848,12 +3807,15 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { svc->high_source_sad_superframe = cpi->rc.high_source_sad; + svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion; // On scene change reset temporal layer pattern to TL0. // Note that if the base/lower spatial layers are skipped: instead of // inserting base layer here, we force max-q for the next superframe // with lower spatial layers: this is done in vp9_encodedframe_overshoot() // when max-q is decided for the current layer. - if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0) { + // Only do this reset for bypass/flexible mode. + if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 && + svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { // rc->high_source_sad will get reset so copy it to restore it. int tmp_high_source_sad = cpi->rc.high_source_sad; vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME); @@ -6185,6 +6147,79 @@ static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame, } #endif // USE_PQSORT #endif // CHANGE_MV_SEARCH_ORDER + +static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx, + YV12_BUFFER_CONFIG *ref_frame[3], + BLOCK_SIZE bsize) { + VP9_COMMON *cm = &cpi->common; + ThreadData *td = &cpi->td; + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int fs_loc_sort_size; + int fs_loc_heap_size; + int mi_row, mi_col; + + tpl_frame->lambda = 250; + + fs_loc_sort_size = 0; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; + tpl_stats->feature_score = get_feature_score( + xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); + fs_loc->visited = 0; + fs_loc->feature_score = tpl_stats->feature_score; + fs_loc->mi_row = mi_row; + fs_loc->mi_col = mi_col; + cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc; + ++fs_loc_sort_size; + } + } + + qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, + sizeof(*cpi->feature_score_loc_sort), compare_feature_score); + +#if CHANGE_MV_SEARCH_ORDER +#if !USE_PQSORT + for (i = 0; i < fs_loc_sort_size; ++i) { + FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i]; + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); + } +#else // !USE_PQSORT + fs_loc_heap_size = 0; + max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, + cpi->feature_score_loc_sort[0]); + + while (fs_loc_heap_size > 0) { + FEATURE_SCORE_LOC *fs_loc; + max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); + + fs_loc->visited = 1; + + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); + + add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col, + &fs_loc_heap_size); + } +#endif // !USE_PQSORT +#else // CHANGE_MV_SEARCH_ORDER + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col); + } + } +#endif // CHANGE_MV_SEARCH_ORDER +} #endif // CONFIG_NON_GREEDY_MV void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, @@ -6217,17 +6252,6 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; int64_t recon_error, sse; -#if CONFIG_NON_GREEDY_MV - int rf_idx; - int fs_loc_sort_size; -#if CHANGE_MV_SEARCH_ORDER -#if USE_PQSORT - int fs_loc_heap_size; -#else - int i; -#endif // USE_PQSORT -#endif // CHANGE_MV_SEARCH_ORDER -#endif // CONFIG_NON_GREEDY_MV // Setup scaling factor #if CONFIG_VP9_HIGHBITDEPTH @@ -6268,66 +6292,9 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, vp9_frame_init_quantizer(cpi); #if CONFIG_NON_GREEDY_MV - tpl_frame->lambda = 250; - fs_loc_sort_size = 0; - - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - const int mb_y_offset = - mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; - const int bw = 4 << b_width_log2_lookup[bsize]; - const int bh = 4 << b_height_log2_lookup[bsize]; - TplDepStats *tpl_stats = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; - FEATURE_SCORE_LOC *fs_loc = - &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; - tpl_stats->feature_score = get_feature_score( - xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); - fs_loc->visited = 0; - fs_loc->feature_score = tpl_stats->feature_score; - fs_loc->mi_row = mi_row; - fs_loc->mi_col = mi_col; - cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc; - ++fs_loc_sort_size; - } - } - - qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, - sizeof(*cpi->feature_score_loc_sort), compare_feature_score); - -#if CHANGE_MV_SEARCH_ORDER -#if !USE_PQSORT - for (i = 0; i < fs_loc_sort_size; ++i) { - FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i]; - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, - fs_loc->mi_col); - } -#else // !USE_PQSORT - fs_loc_heap_size = 0; - max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, - cpi->feature_score_loc_sort[0]); - - while (fs_loc_heap_size > 0) { - FEATURE_SCORE_LOC *fs_loc; - max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); - - fs_loc->visited = 1; - - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, - fs_loc->mi_col); + build_motion_field(cpi, xd, frame_idx, ref_frame, bsize); +#endif - add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col, - &fs_loc_heap_size); - } -#endif // !USE_PQSORT -#else // CHANGE_MV_SEARCH_ORDER - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col); - } - } -#endif // CHANGE_MV_SEARCH_ORDER -#endif // CONFIG_NON_GREEDY_MV for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, @@ -6341,6 +6308,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, bsize); #if CONFIG_NON_GREEDY_MV { + int rf_idx; TplDepStats *this_tpl_stats = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; for (rf_idx = 0; rf_idx < 3; ++rf_idx) { @@ -6436,6 +6404,71 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, #endif // DUMP_TPL_STATS #endif // CONFIG_NON_GREEDY_MV +static void init_tpl_buffer(VP9_COMP *cpi) { + VP9_COMMON *cm = &cpi->common; + int frame; + + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); +#if CONFIG_NON_GREEDY_MV + int sqr_bsize; + int rf_idx; + + // TODO(angiebird): This probably needs further modifications to support + // frame scaling later on. + if (cpi->feature_score_loc_alloc == 0) { + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_arr, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_sort, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_heap, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap))); + + cpi->feature_score_loc_alloc = 1; + } +#endif + + // TODO(jingning): Reduce the actual memory use for tpl model build up. + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { + if (cpi->tpl_stats[frame].width >= mi_cols && + cpi->tpl_stats[frame].height >= mi_rows && + cpi->tpl_stats[frame].tpl_stats_ptr) + continue; + +#if CONFIG_NON_GREEDY_MV + vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr); + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize], + vpx_calloc( + mi_rows * mi_cols, + sizeof( + *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]))); + } + } +#endif + vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); + CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, + vpx_calloc(mi_rows * mi_cols, + sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); + cpi->tpl_stats[frame].is_valid = 0; + cpi->tpl_stats[frame].width = mi_cols; + cpi->tpl_stats[frame].height = mi_rows; + cpi->tpl_stats[frame].stride = mi_cols; + cpi->tpl_stats[frame].mi_rows = cm->mi_rows; + cpi->tpl_stats[frame].mi_cols = cm->mi_cols; + } + + for (frame = 0; frame < REF_FRAMES; ++frame) { + cpi->enc_frame_buf[frame].mem_valid = 0; + cpi->enc_frame_buf[frame].released = 1; + } +} + static void setup_tpl_stats(VP9_COMP *cpi) { GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; const GF_GROUP *gf_group = &cpi->twopass.gf_group; @@ -6673,6 +6706,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, if (gf_group_index == 1 && cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE && cpi->sf.enable_tpl_model) { + init_tpl_buffer(cpi); vp9_estimate_qp_gop(cpi); setup_tpl_stats(cpi); } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index c7c45e3a4..e4175f6ed 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -570,6 +570,7 @@ typedef struct VP9_COMP { YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; EncFrameBuf enc_frame_buf[REF_FRAMES]; #if CONFIG_NON_GREEDY_MV + int feature_score_loc_alloc; FEATURE_SCORE_LOC *feature_score_loc_arr; FEATURE_SCORE_LOC **feature_score_loc_sort; FEATURE_SCORE_LOC **feature_score_loc_heap; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index e4a5f3e18..602147421 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -424,6 +424,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, static void set_rt_speed_feature_framesize_independent( VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) { VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; const int is_keyframe = cm->frame_type == KEY_FRAME; const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key; sf->static_segmentation = 0; @@ -479,7 +480,7 @@ static void set_rt_speed_feature_framesize_independent( // Reference masking only enabled for 1 spatial layer, and if none of the // references have been scaled. The latter condition needs to be checked // for external or internal dynamic resize. - sf->reference_masking = (cpi->svc.number_spatial_layers == 1); + sf->reference_masking = (svc->number_spatial_layers == 1); if (sf->reference_masking == 1 && (cpi->external_resize == 1 || cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) { @@ -615,7 +616,7 @@ static void set_rt_speed_feature_framesize_independent( } // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent // increase in encoding time. - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1; + if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1; if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && cpi->oxcf.rc_mode == VPX_CBR) sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; @@ -653,7 +654,7 @@ static void set_rt_speed_feature_framesize_independent( (cm->width * cm->height <= 640 * 360) ? 40000 : 60000; if (cpi->content_state_sb_fd == NULL && (!cpi->use_svc || - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + svc->spatial_layer_id == svc->number_spatial_layers - 1)) { cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); } @@ -662,7 +663,7 @@ static void set_rt_speed_feature_framesize_independent( // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; } - if (cpi->svc.temporal_layer_id > 0) { + if (svc->temporal_layer_id > 0) { sf->adaptive_rd_thresh = 4; sf->limit_newmv_early_exit = 0; sf->base_mv_aggressive = 1; @@ -676,16 +677,15 @@ static void set_rt_speed_feature_framesize_independent( sf->mv.fullpel_search_step_param = 10; // For SVC: use better mv search on base temporal layer, and only // on base spatial layer if highest resolution is above 640x360. - if (cpi->svc.number_temporal_layers > 2 && - cpi->svc.temporal_layer_id == 0 && - (cpi->svc.spatial_layer_id == 0 || + if (svc->number_temporal_layers > 2 && svc->temporal_layer_id == 0 && + (svc->spatial_layer_id == 0 || cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) { sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 6; } - if (cpi->svc.temporal_layer_id > 0 || cpi->svc.spatial_layer_id > 1) { + if (svc->temporal_layer_id > 0 || svc->spatial_layer_id > 1) { sf->use_simple_block_yrd = 1; - if (cpi->svc.non_reference_frame) + if (svc->non_reference_frame) sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE; } if (cpi->use_svc && cpi->row_mt && cpi->oxcf.max_threads > 1) @@ -696,28 +696,28 @@ static void set_rt_speed_feature_framesize_independent( if (!cpi->last_frame_dropped && cpi->resize_state == ORIG && !cpi->external_resize && (!cpi->use_svc || - (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && - !cpi->svc.last_layer_dropped[cpi->svc.number_spatial_layers - 1]))) { + (svc->spatial_layer_id == svc->number_spatial_layers - 1 && + !svc->last_layer_dropped[svc->number_spatial_layers - 1]))) { sf->copy_partition_flag = 1; cpi->max_copied_frame = 2; // The top temporal enhancement layer (for number of temporal layers > 1) // are non-reference frames, so use large/max value for max_copied_frame. - if (cpi->svc.number_temporal_layers > 1 && - cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1) + if (svc->number_temporal_layers > 1 && + svc->temporal_layer_id == svc->number_temporal_layers - 1) cpi->max_copied_frame = 255; } // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. - if (cpi->use_svc && cpi->svc.use_partition_reuse && - cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 && + if (cpi->use_svc && svc->use_partition_reuse && + svc->number_spatial_layers == 3 && svc->temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) sf->svc_use_lowres_part = 1; // For SVC when golden is used as second temporal reference: to avoid // encode time increase only use this feature on base temporal layer. // (i.e remove golden flag from frame_flags for temporal_layer_id > 0). - if (cpi->use_svc && cpi->svc.use_gf_temporal_ref_current_layer && - cpi->svc.temporal_layer_id > 0) + if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && + svc->temporal_layer_id > 0) cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); } @@ -789,17 +789,18 @@ static void set_rt_speed_feature_framesize_independent( (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_lastgolden_frame_usage)); } - if (cpi->svc.previous_frame_is_intra_only) { + if (svc->previous_frame_is_intra_only) { sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_64X64; } // Special case for screen content: increase motion search on base spatial // layer when high motion is detected or previous SL0 frame was dropped. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 && - cpi->svc.spatial_layer_id == 0 && - (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) { + (svc->high_num_blocks_with_motion || svc->last_layer_dropped[0])) { sf->mv.search_method = NSTEP; - sf->mv.fullpel_search_step_param = 2; + // TODO(marpan/jianj): Tune this setting for screensharing. For now use + // larger step_param for non-base layer, to avoid increase in encode time. + sf->mv.fullpel_search_step_param = (svc->spatial_layer_id == 0) ? 2 : 4; } } diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 510087580..df5af6dc9 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -884,7 +884,10 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { svc->non_reference_frame = 0; } - if (svc->spatial_layer_id == 0) svc->high_source_sad_superframe = 0; + if (svc->spatial_layer_id == 0) { + svc->high_source_sad_superframe = 0; + svc->high_num_blocks_with_motion = 0; + } if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->last_layer_dropped[svc->spatial_layer_id] && diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index f1f2457b2..c25644617 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -143,10 +143,11 @@ typedef struct SVC { INTER_LAYER_PRED disable_inter_layer_pred; - // Flag to indicate scene change at current superframe, scene detection is - // currently checked for each superframe prior to encoding, on the full - // resolution source. + // Flag to indicate scene change and high num of motion blocks at current + // superframe, scene detection is currently checked for each superframe prior + // to encoding, on the full resolution source. int high_source_sad_superframe; + int high_num_blocks_with_motion; // Flags used to get SVC pattern info. int update_buffer_slot[VPX_SS_MAX_LAYERS]; |