diff options
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_bitstream.c | 59 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 51 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 22 | ||||
-rw-r--r-- | vp9/encoder/vp9_ethread.c | 110 | ||||
-rw-r--r-- | vp9/encoder/vp9_ethread.h | 5 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 194 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 13 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 55 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_diamond_search_sad_avx.c | 7 |
9 files changed, 273 insertions, 243 deletions
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index e23ca9773..7644930c1 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -236,11 +236,11 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd, } } -static void pack_inter_mode_mvs( - VP9_COMP *cpi, const MACROBLOCKD *const xd, - const MB_MODE_INFO_EXT *const mbmi_ext, vpx_writer *w, - unsigned int *const max_mv_magnitude, - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, const MACROBLOCKD *const xd, + const MB_MODE_INFO_EXT *const mbmi_ext, + vpx_writer *w, + unsigned int *const max_mv_magnitude, + int interp_filter_selected[][SWITCHABLE]) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc->nmvc; const struct segmentation *const seg = &cm->seg; @@ -373,11 +373,12 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd, write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]); } -static void write_modes_b( - VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, - vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, - int mi_row, int mi_col, unsigned int *const max_mv_magnitude, - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { +static void write_modes_b(VP9_COMP *cpi, MACROBLOCKD *const xd, + const TileInfo *const tile, vpx_writer *w, + TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, + int mi_row, int mi_col, + unsigned int *const max_mv_magnitude, + int interp_filter_selected[][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; const MB_MODE_INFO_EXT *const mbmi_ext = cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); @@ -422,12 +423,12 @@ static void write_partition(const VP9_COMMON *const cm, } } -static void write_modes_sb( - VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, - vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, - int mi_row, int mi_col, BLOCK_SIZE bsize, - unsigned int *const max_mv_magnitude, - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { +static void write_modes_sb(VP9_COMP *cpi, MACROBLOCKD *const xd, + const TileInfo *const tile, vpx_writer *w, + TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, + int mi_row, int mi_col, BLOCK_SIZE bsize, + unsigned int *const max_mv_magnitude, + int interp_filter_selected[][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) / 4; @@ -485,11 +486,10 @@ static void write_modes_sb( update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -static void write_modes( - VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, - vpx_writer *w, int tile_row, int tile_col, - unsigned int *const max_mv_magnitude, - int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { +static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd, + const TileInfo *const tile, vpx_writer *w, int tile_row, + int tile_col, unsigned int *const max_mv_magnitude, + int interp_filter_selected[][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; int mi_row, mi_col, tile_sb_row; TOKENEXTRA *tok = NULL; @@ -1241,12 +1241,21 @@ static void write_uncompressed_header(VP9_COMP *cpi, vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); write_frame_size(cm, wb); } else { + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + const MV_REFERENCE_FRAME first_ref = get_first_ref_frame(cpi); + const int first_ref_map_idx = get_ref_frame_map_idx(cpi, first_ref); MV_REFERENCE_FRAME ref_frame; vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX); - vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame), - REF_FRAMES_LOG2); + + // If a reference frame is not referenced, then set the index for that + // reference to the first one used/referenced. + for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) { + const int referenced = cpi->ref_frame_flags & flag_list[ref_frame]; + const int map_idx = referenced ? get_ref_frame_map_idx(cpi, ref_frame) + : first_ref_map_idx; + assert(map_idx != INVALID_IDX); + vpx_wb_write_literal(wb, map_idx, REF_FRAMES_LOG2); vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]); } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 7e80835f6..8d5ec5a36 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -25,6 +25,7 @@ #endif #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" +#include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_timer.h" #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" @@ -929,24 +930,21 @@ static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } -void vp9_initialize_enc(void) { - static volatile int init_done = 0; - - if (!init_done) { - vp9_rtcd(); - vpx_dsp_rtcd(); - vpx_scale_rtcd(); - vp9_init_intra_predictors(); - vp9_init_me_luts(); - vp9_rc_init_minq_luts(); - vp9_entropy_mv_init(); +static void initialize_enc(void) { + vp9_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); + vp9_init_intra_predictors(); + vp9_init_me_luts(); + vp9_rc_init_minq_luts(); + vp9_entropy_mv_init(); #if !CONFIG_REALTIME_ONLY - vp9_temporal_filter_init(); + vp9_temporal_filter_init(); #endif - init_done = 1; - } } +void vp9_initialize_enc(void) { once(initialize_enc); } + static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int i; @@ -2676,7 +2674,6 @@ static void free_tpl_buffer(VP9_COMP *cpi); void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; unsigned int i; - int t; if (!cpi) return; @@ -2789,28 +2786,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) { free_tpl_buffer(cpi); - for (t = 0; t < cpi->num_workers; ++t) { - VPxWorker *const worker = &cpi->workers[t]; - EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; - - // Deallocate allocated threads. - vpx_get_worker_interface()->end(worker); - - // Deallocate allocated thread data. - if (t < cpi->num_workers - 1) { - vpx_free(thread_data->td->counts); - vp9_free_pc_tree(thread_data->td); - vpx_free(thread_data->td); - } - } - vpx_free(cpi->tile_thr_data); - vpx_free(cpi->workers); + vp9_loop_filter_dealloc(&cpi->lf_row_sync); + vp9_bitstream_encode_tiles_buffer_dealloc(cpi); vp9_row_mt_mem_dealloc(cpi); - - if (cpi->num_workers > 1) { - vp9_loop_filter_dealloc(&cpi->lf_row_sync); - vp9_bitstream_encode_tiles_buffer_dealloc(cpi); - } + vp9_encode_free_mt_data(cpi); #if !CONFIG_REALTIME_ONLY vp9_alt_ref_aq_destroy(cpi->alt_ref_aq); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 9774a64cc..1bca7ded7 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -1196,14 +1196,24 @@ static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) { (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); } +static INLINE MV_REFERENCE_FRAME get_first_ref_frame(VP9_COMP *const cpi) { + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + MV_REFERENCE_FRAME ref_frame = LAST_FRAME; + while (ref_frame < MAX_REF_FRAMES) { + if (cpi->ref_frame_flags & flag_list[ref_frame]) break; + ref_frame++; + } + return ref_frame; +} + static INLINE int get_ref_frame_map_idx(const VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - if (ref_frame == LAST_FRAME) { - return cpi->lst_fb_idx; - } else if (ref_frame == GOLDEN_FRAME) { - return cpi->gld_fb_idx; - } else { - return cpi->alt_fb_idx; + switch (ref_frame) { + case LAST_FRAME: return cpi->lst_fb_idx; + case GOLDEN_FRAME: return cpi->gld_fb_idx; + case ALTREF_FRAME: return cpi->alt_fb_idx; + default: return INVALID_IDX; } } diff --git a/vp9/encoder/vp9_ethread.c b/vp9/encoder/vp9_ethread.c index e7f8a537d..453fe2e0d 100644 --- a/vp9/encoder/vp9_ethread.c +++ b/vp9/encoder/vp9_ethread.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_thread_common.h" +#include "vp9/encoder/vp9_bitstream.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" @@ -79,60 +81,59 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) { VP9_COMMON *const cm = &cpi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; + // While using SVC, we need to allocate threads according to the highest + // resolution. When row based multithreading is enabled, it is OK to + // allocate more threads than the number of max tile columns. + if (cpi->use_svc && !cpi->row_mt) { + int max_tile_cols = get_max_tile_cols(cpi); + num_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); + } + assert(num_workers > 0); + if (num_workers == cpi->num_workers) return; + vp9_loop_filter_dealloc(&cpi->lf_row_sync); + vp9_bitstream_encode_tiles_buffer_dealloc(cpi); + vp9_encode_free_mt_data(cpi); - // Only run once to create threads and allocate thread data. - if (cpi->num_workers == 0) { - int allocated_workers = num_workers; - - // While using SVC, we need to allocate threads according to the highest - // resolution. When row based multithreading is enabled, it is OK to - // allocate more threads than the number of max tile columns. - if (cpi->use_svc && !cpi->row_mt) { - int max_tile_cols = get_max_tile_cols(cpi); - allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); - } - - CHECK_MEM_ERROR(cm, cpi->workers, - vpx_malloc(allocated_workers * sizeof(*cpi->workers))); + CHECK_MEM_ERROR(cm, cpi->workers, + vpx_malloc(num_workers * sizeof(*cpi->workers))); - CHECK_MEM_ERROR(cm, cpi->tile_thr_data, - vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); + CHECK_MEM_ERROR(cm, cpi->tile_thr_data, + vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data))); - for (i = 0; i < allocated_workers; i++) { - VPxWorker *const worker = &cpi->workers[i]; - EncWorkerData *thread_data = &cpi->tile_thr_data[i]; + for (i = 0; i < num_workers; i++) { + VPxWorker *const worker = &cpi->workers[i]; + EncWorkerData *thread_data = &cpi->tile_thr_data[i]; - ++cpi->num_workers; - winterface->init(worker); + ++cpi->num_workers; + winterface->init(worker); - if (i < allocated_workers - 1) { - thread_data->cpi = cpi; + if (i < num_workers - 1) { + thread_data->cpi = cpi; - // Allocate thread data. - CHECK_MEM_ERROR(cm, thread_data->td, - vpx_memalign(32, sizeof(*thread_data->td))); - vp9_zero(*thread_data->td); + // Allocate thread data. + CHECK_MEM_ERROR(cm, thread_data->td, + vpx_memalign(32, sizeof(*thread_data->td))); + vp9_zero(*thread_data->td); - // Set up pc_tree. - thread_data->td->leaf_tree = NULL; - thread_data->td->pc_tree = NULL; - vp9_setup_pc_tree(cm, thread_data->td); + // Set up pc_tree. + thread_data->td->leaf_tree = NULL; + thread_data->td->pc_tree = NULL; + vp9_setup_pc_tree(cm, thread_data->td); - // Allocate frame counters in thread data. - CHECK_MEM_ERROR(cm, thread_data->td->counts, - vpx_calloc(1, sizeof(*thread_data->td->counts))); + // Allocate frame counters in thread data. + CHECK_MEM_ERROR(cm, thread_data->td->counts, + vpx_calloc(1, sizeof(*thread_data->td->counts))); - // Create threads - if (!winterface->reset(worker)) - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Tile encoder thread creation failed"); - } else { - // Main thread acts as a worker and uses the thread data in cpi. - thread_data->cpi = cpi; - thread_data->td = &cpi->td; - } - winterface->sync(worker); + // Create threads + if (!winterface->reset(worker)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile encoder thread creation failed"); + } else { + // Main thread acts as a worker and uses the thread data in cpi. + thread_data->cpi = cpi; + thread_data->td = &cpi->td; } + winterface->sync(worker); } } @@ -169,6 +170,27 @@ static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2, } } +void vp9_encode_free_mt_data(struct VP9_COMP *cpi) { + int t; + for (t = 0; t < cpi->num_workers; ++t) { + VPxWorker *const worker = &cpi->workers[t]; + EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; + + // Deallocate allocated threads. + vpx_get_worker_interface()->end(worker); + + // Deallocate allocated thread data. + if (t < cpi->num_workers - 1) { + vpx_free(thread_data->td->counts); + vp9_free_pc_tree(thread_data->td); + vpx_free(thread_data->td); + } + } + vpx_free(cpi->tile_thr_data); + vpx_free(cpi->workers); + cpi->num_workers = 0; +} + void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; diff --git a/vp9/encoder/vp9_ethread.h b/vp9/encoder/vp9_ethread.h index cda0293bc..4c192da51 100644 --- a/vp9/encoder/vp9_ethread.h +++ b/vp9/encoder/vp9_ethread.h @@ -42,6 +42,11 @@ typedef struct VP9RowMTSyncData { int rows; } VP9RowMTSync; +// Frees EncWorkerData related allocations made by vp9_encode_*_mt(). +// row_mt specific data is freed with vp9_row_mt_mem_dealloc() and is not +// called by this function. +void vp9_encode_free_mt_data(struct VP9_COMP *cpi); + void vp9_encode_tiles_mt(struct VP9_COMP *cpi); void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi); diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index ac29f36ec..cd6706420 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -159,59 +159,63 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { #if CONFIG_VP9_HIGHBITDEPTH /* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - int64_t tmpmse; \ - const MV mv = { r, c }; \ - const MV ref_mv = { rr, rc }; \ - if (second_pred == NULL) { \ - thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ - src_stride, &sse); \ - } else { \ - thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ - src_stride, &sse, second_pred); \ - } \ - tmpmse = thismse; \ - tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ - if (tmpmse >= INT_MAX) { \ - v = INT_MAX; \ - } else if ((v = (uint32_t)tmpmse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } +#define CHECK_BETTER(v, r, c) \ + do { \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + int64_t tmpmse; \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + if (second_pred == NULL) { \ + thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, &sse); \ + } else { \ + thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, &sse, second_pred); \ + } \ + tmpmse = thismse; \ + tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ + if (tmpmse >= INT_MAX) { \ + v = INT_MAX; \ + } else if ((v = (uint32_t)tmpmse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } while (0) #else /* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - const MV mv = { r, c }; \ - const MV ref_mv = { rr, rc }; \ - if (second_pred == NULL) \ - thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ - src_stride, &sse); \ - else \ - thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ - src_stride, &sse, second_pred); \ - if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ - thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } +#define CHECK_BETTER(v, r, c) \ + do { \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + if (second_pred == NULL) \ + thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, &sse); \ + else \ + thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ + src_stride, &sse, second_pred); \ + if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } while (0) #endif #define FIRST_LEVEL_CHECKS \ - { \ + do { \ unsigned int left, right, up, down, diag; \ CHECK_BETTER(left, tr, tc - hstep); \ CHECK_BETTER(right, tr, tc + hstep); \ @@ -224,10 +228,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ } \ - } + } while (0) #define SECOND_LEVEL_CHECKS \ - { \ + do { \ int kr, kc; \ unsigned int second; \ if (tr != br && tc != bc) { \ @@ -256,7 +260,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ } \ } \ - } + } while (0) #define SETUP_SUBPEL_SEARCH \ const uint8_t *const z = x->plane[0].src.buf; \ @@ -290,7 +294,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { maxr = subpel_mv_limits.row_max; \ \ bestmv->row *= 8; \ - bestmv->col *= 8; + bestmv->col *= 8 static unsigned int setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, @@ -678,48 +682,52 @@ static int accurate_sub_pel_search( // TODO(yunqing): this part can be further refactored. #if CONFIG_VP9_HIGHBITDEPTH /* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - int64_t tmpmse; \ - const MV mv = { r, c }; \ - const MV ref_mv = { rr, rc }; \ - thismse = \ - accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ - y, y_stride, second_pred, w, h, &sse); \ - tmpmse = thismse; \ - tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ - if (tmpmse >= INT_MAX) { \ - v = INT_MAX; \ - } else if ((v = (uint32_t)tmpmse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } +#define CHECK_BETTER1(v, r, c) \ + do { \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + int64_t tmpmse; \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, \ + src_stride, y, y_stride, second_pred, \ + w, h, &sse); \ + tmpmse = thismse; \ + tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ + if (tmpmse >= INT_MAX) { \ + v = INT_MAX; \ + } else if ((v = (uint32_t)tmpmse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } while (0) #else /* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - const MV mv = { r, c }; \ - const MV ref_mv = { rr, rc }; \ - thismse = \ - accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ - y, y_stride, second_pred, w, h, &sse); \ - if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ - thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } +#define CHECK_BETTER1(v, r, c) \ + do { \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, \ + src_stride, y, y_stride, second_pred, \ + w, h, &sse); \ + if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } \ + } while (0) #endif @@ -2962,7 +2970,7 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, (void)sse; \ (void)thismse; \ (void)cost_list; \ - (void)use_accurate_subpel_search; + (void)use_accurate_subpel_search // Return the maximum MV. uint32_t vp9_return_max_sub_pixel_mv( diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index e38464c72..ac346115f 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -223,9 +223,10 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; if (oxcf->rc_max_inter_bitrate_pct) { - const int max_rate = - rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; - target = VPXMIN(target, max_rate); + const int64_t max_rate = + (int64_t)rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; + // target is of type int and VPXMIN cannot evaluate to larger than target + target = (int)VPXMIN(target, max_rate); } return target; } @@ -234,9 +235,9 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; if (oxcf->rc_max_intra_bitrate_pct) { - const int max_rate = - rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; - target = VPXMIN(target, max_rate); + const int64_t max_rate = + (int64_t)rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; + target = (int)VPXMIN(target, max_rate); } if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; return target; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index ad3a8f7af..f01cb17a2 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -73,7 +73,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->downsample_filter_type[sl] = BILINEAR; svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark; - svc->fb_idx_upd_tl0[sl] = -1; + svc->fb_idx_upd_tl0[sl] = INVALID_IDX; svc->drop_count[sl] = 0; svc->spatial_layer_sync[sl] = 0; svc->force_drop_constrained_from_above[sl] = 0; @@ -462,32 +462,21 @@ static void reset_fb_idx_unused(VP9_COMP *const cpi) { // fb_idx for that reference to the first one used/referenced. // This is to avoid setting fb_idx for a reference to a slot that is not // used/needed (i.e., since that reference is not referenced or refreshed). - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - MV_REFERENCE_FRAME ref_frame; - MV_REFERENCE_FRAME first_ref = 0; - int first_fb_idx = 0; - int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx }; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - first_ref = ref_frame; - first_fb_idx = fb_idx[ref_frame - 1]; - break; + const MV_REFERENCE_FRAME first_ref = get_first_ref_frame(cpi); + const int map_idx = get_ref_frame_map_idx(cpi, first_ref); + if (map_idx != INVALID_IDX) { + if (!(cpi->ref_frame_flags & VP9_LAST_FLAG || + cpi->ext_refresh_last_frame)) { + cpi->lst_fb_idx = map_idx; + } + if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG || + cpi->ext_refresh_golden_frame)) { + cpi->gld_fb_idx = map_idx; + } + if (!(cpi->ref_frame_flags & VP9_ALT_FLAG || + cpi->ext_refresh_alt_ref_frame)) { + cpi->alt_fb_idx = map_idx; } - } - if (first_ref > 0) { - if (first_ref != LAST_FRAME && - !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) && - !cpi->ext_refresh_last_frame) - cpi->lst_fb_idx = first_fb_idx; - else if (first_ref != GOLDEN_FRAME && - !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && - !cpi->ext_refresh_golden_frame) - cpi->gld_fb_idx = first_fb_idx; - else if (first_ref != ALTREF_FRAME && - !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) && - !cpi->ext_refresh_alt_ref_frame) - cpi->alt_fb_idx = first_fb_idx; } } @@ -716,9 +705,9 @@ static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config( int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode; cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl]; cpi->ext_refresh_frame_flags_pending = 1; - cpi->lst_fb_idx = svc->lst_fb_idx[sl]; - cpi->gld_fb_idx = svc->gld_fb_idx[sl]; - cpi->alt_fb_idx = svc->alt_fb_idx[sl]; + if (svc->reference_last[sl]) cpi->lst_fb_idx = svc->lst_fb_idx[sl]; + if (svc->reference_golden[sl]) cpi->gld_fb_idx = svc->gld_fb_idx[sl]; + if (svc->reference_altref[sl]) cpi->alt_fb_idx = svc->alt_fb_idx[sl]; cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 0; cpi->ext_refresh_alt_ref_frame = 0; @@ -875,9 +864,9 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { // flags are passed via the encode call (bypass mode). Issue is that we're // resetting ext_refresh_frame_flags_pending to 0 on frame drops. if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { - memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx)); - memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx)); - memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx)); + memset(&svc->lst_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx)); + memset(&svc->gld_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx)); + memset(&svc->alt_fb_idx, INVALID_IDX, sizeof(svc->lst_fb_idx)); // These are set by API before the superframe is encoded and they are // passed to encoder layer by layer. Don't reset them on layer 0 in bypass // mode. @@ -970,7 +959,7 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->last_layer_dropped[svc->spatial_layer_id] && - svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 && + svc->fb_idx_upd_tl0[svc->spatial_layer_id] != INVALID_IDX && !svc->layer_context[svc->temporal_layer_id].is_key_frame) { // For fixed/non-flexible mode, if the previous frame (same spatial layer // from previous superframe) was dropped, make sure the lst_fb_idx diff --git a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c index 4be6a5ea0..fcf50eb2a 100644 --- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c +++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c @@ -282,7 +282,14 @@ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, // Update the global minimum if the local minimum is smaller if (LIKELY(local_best_sad < best_sad)) { +#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx]; +#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__) +#pragma GCC diagnostic pop +#endif new_best_address = ((const uint8_t **)v_blocka)[local_best_idx]; best_sad = local_best_sad; |