diff options
Diffstat (limited to 'vp9')
-rw-r--r-- | vp9/common/vp9_blockd.h | 4 | ||||
-rw-r--r-- | vp9/common/vp9_thread_common.c | 6 | ||||
-rw-r--r-- | vp9/decoder/vp9_decodeframe.c | 382 | ||||
-rw-r--r-- | vp9/decoder/vp9_decoder.c | 76 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 15 | ||||
-rw-r--r-- | vp9/encoder/vp9_firstpass.c | 20 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 48 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 17 | ||||
-rw-r--r-- | vp9/encoder/vp9_pickmode.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 19 | ||||
-rw-r--r-- | vp9/encoder/vp9_rd.c | 23 | ||||
-rw-r--r-- | vp9/encoder/vp9_rdopt.c | 74 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 40 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 14 |
14 files changed, 614 insertions, 130 deletions
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 65db065a6..f0887157e 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -131,6 +131,8 @@ struct macroblockd_plane { // encoder const int16_t *dequant; + + int *eob; }; #define BLOCK_OFFSET(x, i) ((x) + (i)*16) @@ -194,6 +196,8 @@ typedef struct macroblockd { int corrupted; struct vpx_internal_error_info *error_info; + + PARTITION_TYPE *partition; } MACROBLOCKD; static INLINE PLANE_TYPE get_plane_type(int plane) { diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c index 36530fae6..ba9aa69d0 100644 --- a/vp9/common/vp9_thread_common.c +++ b/vp9/common/vp9_thread_common.c @@ -8,6 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <limits.h> #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" @@ -402,6 +403,11 @@ static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); pthread_mutex_lock(&lf_sync->lf_mutex); if (lf_sync->corrupted) { + int row = return_val >> MI_BLOCK_SIZE_LOG2; + pthread_mutex_lock(&lf_sync->mutex[row]); + lf_sync->cur_sb_col[row] = INT_MAX; + pthread_cond_signal(&lf_sync->cond[row]); + pthread_mutex_unlock(&lf_sync->mutex[row]); return_val = -1; } pthread_mutex_unlock(&lf_sync->lf_mutex); diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index bc0fc6197..c9c85053d 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -45,6 +45,12 @@ #define MAX_VP9_HEADER_SIZE 80 +typedef int (*predict_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, TX_SIZE tx_size); + +typedef void (*intra_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, TX_SIZE tx_size); + static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { return len != 0 && len <= (size_t)(end - start); } @@ -325,6 +331,59 @@ static void predict_and_reconstruct_intra_block(TileWorkerData *twd, } } +static void parse_intra_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + if (!mi->skip) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_TYPE tx_type = + (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + const scan_order *sc = (plane || xd->lossless) + ? &vp9_default_scan_orders[tx_size] + : &vp9_scan_orders[tx_size][tx_type]; + *pd->eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, + mi->segment_id); + /* Keep the alignment to 16 */ + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + } +} + +static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd, + MODE_INFO *const mi, + int plane, int row, + int col, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, dst, pd->dst.stride, + dst, pd->dst.stride, col, row, plane); + + if (!mi->skip) { + const TX_TYPE tx_type = + (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + if (*pd->eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, dst, + pd->dst.stride, *pd->eob); + } + /* Keep the alignment to 16 */ + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + } +} + static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { @@ -342,6 +401,41 @@ static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi, return eob; } +static int parse_inter_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, + int plane, int row, int col, + TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *sc = &vp9_default_scan_orders[tx_size]; + const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, + mi->segment_id); + + *pd->eob = eob; + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + + return eob; +} + +static int reconstruct_inter_block_row_mt(TileWorkerData *twd, + MODE_INFO *const mi, int plane, + int row, int col, TX_SIZE tx_size) { + MACROBLOCKD *const xd = &twd->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int eob = *pd->eob; + + (void)mi; + if (eob > 0) { + inverse_transform_block_inter( + xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], + pd->dst.stride, eob); + } + pd->dqcoeff += (16 << (tx_size << 1)); + pd->eob++; + + return eob; +} + static void build_mc_border(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int x, int y, int b_w, int b_h, int w, int h) { @@ -689,6 +783,25 @@ static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, } } +static MODE_INFO *set_offsets_recon(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, int bw, int bh, + int bwl, int bhl) { + const int offset = mi_row * cm->mi_stride + mi_col; + const TileInfo *const tile = &xd->tile; + xd->mi = cm->mi_grid_visible + offset; + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_skip_context(xd, mi_row, mi_col); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + return xd->mi[0]; +} + static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int bw, int bh, int x_mis, int y_mis, int bwl, int bhl) { @@ -718,6 +831,66 @@ static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, return xd->mi[0]; } +static INLINE int predict_recon_inter(MACROBLOCKD *xd, MODE_INFO *mi, + TileWorkerData *twd, + predict_recon_func func) { + int eobtotal = 0; + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += func(twd, mi, plane, row, col, tx_size); + } + return eobtotal; +} + +static INLINE void predict_recon_intra(MACROBLOCKD *xd, MODE_INFO *mi, + TileWorkerData *twd, + intra_recon_func func) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = + num_4x4_w + (xd->mb_to_right_edge >= 0 + ? 0 + : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = + num_4x4_h + (xd->mb_to_bottom_edge >= 0 + ? 0 + : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + func(twd, mi, plane, row, col, tx_size); + } +} + static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; @@ -818,6 +991,81 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, } } +static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + MACROBLOCKD *const xd = &twd->xd; + + MODE_INFO *mi = set_offsets_recon(cm, xd, mi_row, mi_col, bw, bh, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, + "Invalid block size."); + } + + if (!is_inter_block(mi)) { + predict_recon_intra(xd, mi, twd, + predict_and_reconstruct_intra_block_row_mt); + } else { + // Prediction + dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + + // Reconstruction + if (!mi->skip) { + predict_recon_inter(xd, mi, twd, reconstruct_inter_block_row_mt); + } + } + + vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); +} + +static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int less8x8 = bsize < BLOCK_8X8; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + vpx_reader *r = &twd->bit_reader; + MACROBLOCKD *const xd = &twd->xd; + + MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, + y_mis, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, + "Invalid block size."); + } + + vp9_read_mode_info(twd, pbi, mi_row, mi_col, x_mis, y_mis); + + if (mi->skip) { + dec_reset_skip_context(xd); + } + + if (!is_inter_block(mi)) { + predict_recon_intra(xd, mi, twd, parse_intra_block_row_mt); + } else { + if (!mi->skip) { + const int eobtotal = + predict_recon_inter(xd, mi, twd, parse_inter_block_row_mt); + + if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter + } + } + + xd->corrupted |= vpx_reader_has_error(r); +} + static INLINE int dec_partition_plane_context(TileWorkerData *twd, int mi_row, int mi_col, int bsl) { const PARTITION_CONTEXT *above_ctx = twd->xd.above_seg_context + mi_col; @@ -924,6 +1172,118 @@ static void decode_partition(TileWorkerData *twd, VP9Decoder *const pbi, dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); } +static void recon_partition(TileWorkerData *twd, VP9Decoder *const pbi, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int n4x4_l2) { + VP9_COMMON *const cm = &pbi->common; + const int n8x8_l2 = n4x4_l2 - 1; + const int num_8x8_wh = 1 << n8x8_l2; + const int hbs = num_8x8_wh >> 1; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + MACROBLOCKD *const xd = &twd->xd; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + partition = *xd->partition; + xd->partition++; + + subsize = get_subsize(bsize, partition); + if (!hbs) { + // calculate bmode block dimensions (log 2) + xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); + xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); + recon_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); + } else { + switch (partition) { + case PARTITION_NONE: + recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); + break; + case PARTITION_HORZ: + recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); + if (has_rows) + recon_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, + n8x8_l2); + break; + case PARTITION_VERT: + recon_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); + if (has_cols) + recon_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + n4x4_l2); + break; + case PARTITION_SPLIT: + recon_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2); + recon_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2); + recon_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2); + recon_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2); + break; + default: assert(0 && "Invalid partition type"); + } + } +} + +static void parse_partition(TileWorkerData *twd, VP9Decoder *const pbi, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int n4x4_l2) { + VP9_COMMON *const cm = &pbi->common; + const int n8x8_l2 = n4x4_l2 - 1; + const int num_8x8_wh = 1 << n8x8_l2; + const int hbs = num_8x8_wh >> 1; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + MACROBLOCKD *const xd = &twd->xd; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; + + *xd->partition = + read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); + + partition = *xd->partition; + xd->partition++; + + subsize = get_subsize(bsize, partition); + if (!hbs) { + // calculate bmode block dimensions (log 2) + xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); + xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); + parse_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); + } else { + switch (partition) { + case PARTITION_NONE: + parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); + break; + case PARTITION_HORZ: + parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); + if (has_rows) + parse_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, + n8x8_l2); + break; + case PARTITION_VERT: + parse_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); + if (has_cols) + parse_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + n4x4_l2); + break; + case PARTITION_SPLIT: + parse_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2); + parse_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2); + parse_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2); + parse_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2); + break; + default: assert(0 && "Invalid partition type"); + } + } + + // update partition context + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) + dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); +} + static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, size_t read_size, struct vpx_internal_error_info *error_info, @@ -1406,7 +1766,27 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { - decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + if (pbi->row_mt == 1) { + int plane; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane]; + } + tile_data->xd.partition = row_mt_worker_data->partition; + parse_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane]; + } + tile_data->xd.partition = row_mt_worker_data->partition; + recon_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + } else { + decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + } } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c index 1e2a44293..7fde0b07f 100644 --- a/vp9/decoder/vp9_decoder.c +++ b/vp9/decoder/vp9_decoder.c @@ -302,6 +302,44 @@ static void swap_frame_buffers(VP9Decoder *pbi) { cm->frame_refs[ref_index].idx = -1; } +static void release_fb_on_decoder_exit(VP9Decoder *pbi) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VP9_COMMON *volatile const cm = &pbi->common; + BufferPool *volatile const pool = cm->buffer_pool; + RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; + int i; + + // Synchronize all threads immediately as a subsequent decode call may + // cause a resize invalidating some allocations. + winterface->sync(&pbi->lf_worker); + for (i = 0; i < pbi->num_tile_workers; ++i) { + winterface->sync(&pbi->tile_workers[i]); + } + + // Release all the reference buffers if worker thread is holding them. + if (pbi->hold_ref_buf == 1) { + int ref_index = 0, mask; + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + } + pbi->hold_ref_buf = 0; + } +} + int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource) { VP9_COMMON *volatile const cm = &pbi->common; @@ -339,6 +377,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); if (cm->new_fb_idx == INVALID_IDX) { + pbi->ready_for_new_data = 1; + release_fb_on_decoder_exit(pbi); + vpx_clear_system_state(); vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Unable to find free frame buffer"); return cm->error.error_code; @@ -351,44 +392,11 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; if (setjmp(cm->error.jmp)) { - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - int i; - cm->error.setjmp = 0; pbi->ready_for_new_data = 1; - - // Synchronize all threads immediately as a subsequent decode call may - // cause a resize invalidating some allocations. - winterface->sync(&pbi->lf_worker); - for (i = 0; i < pbi->num_tile_workers; ++i) { - winterface->sync(&pbi->tile_workers[i]); - } - - // Release all the reference buffers if worker thread is holding them. - if (pbi->hold_ref_buf == 1) { - int ref_index = 0, mask; - for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { - const int old_idx = cm->ref_frame_map[ref_index]; - // Current thread releases the holding of reference frame. - decrease_ref_count(old_idx, frame_bufs, pool); - - // Release the reference frame in reference map. - if (mask & 1) { - decrease_ref_count(old_idx, frame_bufs, pool); - } - ++ref_index; - } - - // Current thread releases the holding of reference frame. - for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { - const int old_idx = cm->ref_frame_map[ref_index]; - decrease_ref_count(old_idx, frame_bufs, pool); - } - pbi->hold_ref_buf = 0; - } + release_fb_on_decoder_exit(pbi); // Release current frame. decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); - vpx_clear_system_state(); return -1; } diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index a73185623..1b3010c62 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3873,6 +3873,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + // search method and step parameter might be changed in speed settings. + init_motion_estimation(cpi); + if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && @@ -5598,8 +5601,7 @@ static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, &tpl_frame ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; if (tpl_ptr->ready[rf_idx]) { - nb_full_mvs[i].as_mv.row = tpl_ptr->mv_arr[rf_idx].as_mv.row >> 3; - nb_full_mvs[i].as_mv.col = tpl_ptr->mv_arr[rf_idx].as_mv.col >> 3; + nb_full_mvs[i].as_mv = get_full_mv(&tpl_ptr->mv_arr[rf_idx].as_mv); } else { nb_full_mvs[i].as_int = INVALID_MV; } @@ -5666,7 +5668,7 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, nb_full_mvs); vp9_full_pixel_diamond_new( cpi, x, &best_ref_mv1_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], - nb_full_mvs, &tpl_stats->mv_arr[rf_idx].as_mv, + nb_full_mvs, NB_MVS_NUM, &tpl_stats->mv_arr[rf_idx].as_mv, &tpl_stats->mv_dist[rf_idx], &tpl_stats->mv_cost[rf_idx]); #else (void)frame_idx; @@ -5973,8 +5975,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); - // TODO(sdeng): Implement SIMD based high bit-depth satd. - intra_cost = vpx_satd_c(coeff, pix_num); + intra_cost = vpx_highbd_satd(coeff, pix_num); } else { vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); @@ -6020,7 +6021,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); - inter_cost = vpx_satd_c(coeff, pix_num); + inter_cost = vpx_highbd_satd(coeff, pix_num); } else { vp9_build_inter_predictor( ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -6361,7 +6362,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3; full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3; this_tpl_stats->mv_cost[rf_idx] = - av1_nb_mvs_inconsistency(&full_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM); #endif // RE_COMPUTE_MV_INCONSISTENCY tpl_frame->mv_dist_sum[rf_idx] += this_tpl_stats->mv_dist[rf_idx]; tpl_frame->mv_cost_sum[rf_idx] += this_tpl_stats->mv_cost[rf_idx]; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index e29e86576..30fd842a1 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -1037,23 +1037,28 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, // Other than for the first frame do a motion search. if (cm->current_video_frame > 0) { - int tmp_err, motion_error, raw_motion_error; + int tmp_err, motion_error, this_motion_error, raw_motion_error; // Assume 0,0 motion with no mv overhead. MV mv = { 0, 0 }, tmp_mv = { 0, 0 }; struct buf_2d unscaled_last_source_buf_2d; + vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); + this_motion_error = highbd_get_prediction_error( + bsize, &x->plane[0].src, &xd->plane[0].pre[0], 8); } else { motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); + this_motion_error = motion_error; } #else motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); + this_motion_error = motion_error; #endif // CONFIG_VP9_HIGHBITDEPTH // Compute the motion error of the 0,0 motion using the last source @@ -1080,6 +1085,15 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, // starting point (best reference) for the search. first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error); + v_fn_ptr.vf = get_block_variance_fn(bsize); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, 8); + } +#endif // CONFIG_VP9_HIGHBITDEPTH + this_motion_error = + vp9_get_mvpred_var(x, &mv, best_ref_mv, &v_fn_ptr, 0); + // If the current best reference mv is not centered on 0,0 then do a // 0,0 based search as well. if (!is_zero_mv(best_ref_mv)) { @@ -1089,6 +1103,8 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (tmp_err < motion_error) { motion_error = tmp_err; mv = tmp_mv; + this_motion_error = + vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0); } } @@ -1275,7 +1291,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); - if (motion_error < scaled_low_intra_thresh) { + if (this_motion_error < scaled_low_intra_thresh) { fp_acc_data->intra_count_low += 1.0; } else { fp_acc_data->intra_count_high += 1.0; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 235f0345e..316227e3c 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -1733,12 +1733,13 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } #if CONFIG_NON_GREEDY_MV -double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) { +double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, + int mv_num) { int i; int update = 0; double best_cost = 0; vpx_clear_system_state(); - for (i = 0; i < NB_MVS_NUM; ++i) { + for (i = 0; i < mv_num; ++i) { if (nb_mvs[i].as_int != INVALID_MV) { MV nb_mv = nb_mvs[i].as_mv; const double row_diff = mv->row - nb_mv.row; @@ -1762,7 +1763,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, double *best_mv_dist, double *best_mv_cost, int search_param, double lambda, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs) { + const int_mv *nb_full_mvs, int full_mv_num) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; @@ -1799,7 +1800,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, // Check the starting position *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); - *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); bestsad = (*best_mv_dist) + lambda * (*best_mv_cost); i = 0; @@ -1833,7 +1835,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, best_full_mv->col + ss_mv[i].col }; const double mv_dist = sad_array[t]; const double mv_cost = - av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -1854,7 +1856,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, const double mv_dist = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); const double mv_cost = - av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); + vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -2242,16 +2244,17 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, double lambda, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, MV *best_mv, - double *best_mv_dist, double *best_mv_cost) { + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv, double *best_mv_dist, + double *best_mv_cost) { int n, num00 = 0; double thissme; double bestsme; const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; vpx_clear_system_state(); - bestsme = vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, - best_mv_dist, best_mv_cost, step_param, - lambda, &n, fn_ptr, nb_full_mvs); + bestsme = vp9_diamond_search_sad_new( + x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost, + step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); // If there won't be more n-step search, check to see if refining search is // needed. @@ -2265,9 +2268,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV temp_mv; double mv_dist; double mv_cost; - thissme = vp9_diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, - &mv_dist, &mv_cost, step_param + n, - lambda, &num00, fn_ptr, nb_full_mvs); + thissme = vp9_diamond_search_sad_new( + x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost, + step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; @@ -2286,9 +2289,9 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV temp_mv = *best_mv; double mv_dist; double mv_cost; - thissme = - vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, lambda, - search_range, fn_ptr, nb_full_mvs); + thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost, + lambda, search_range, fn_ptr, + nb_full_mvs, full_mv_num); if (thissme < bestsme) { bestsme = thissme; *best_mv = temp_mv; @@ -2428,7 +2431,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, double *best_mv_dist, double *best_mv_cost, double lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs) { + const int_mv *nb_full_mvs, int full_mv_num) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; @@ -2439,7 +2442,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, vpx_clear_system_state(); *best_mv_dist = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride); - *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = + vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); best_sad = (*best_mv_dist) + lambda * (*best_mv_cost); for (i = 0; i < search_range; i++) { @@ -2461,7 +2465,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; const double mv_dist = sads[j]; - const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; @@ -2479,7 +2484,8 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const double mv_dist = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); - const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = + vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 54f68ca74..6d89fdfdd 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -126,16 +126,23 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, double *best_mv_dist, double *best_mv_cost, double lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs); + const int_mv *nb_full_mvs, int full_mv_num); double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, double lambda, int do_refine, const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, MV *best_mv, - double *best_mv_dist, double *best_mv_cost); - -double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs); + const int_mv *nb_full_mvs, int full_mv_num, + MV *best_mv, double *best_mv_dist, + double *best_mv_cost); + +double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, int mv_num); +static INLINE MV get_full_mv(const MV *mv) { + MV out_mv; + out_mv.row = mv->row >> 3; + out_mv.col = mv->col >> 3; + return out_mv; +} #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index fe8f24444..babfe4a33 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -233,10 +233,10 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } if (rv && search_subpel) { - int subpel_force_stop = cpi->sf.mv.subpel_force_stop; - if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2; + SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop; + if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = HALF_PEL; if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { - int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; + const int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; if (abs(tmp_mv->as_mv.row) >= mv_thresh || abs(tmp_mv->as_mv.col) >= mv_thresh) subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 52b14ea31..5ad68e2e5 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1412,6 +1412,10 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int active_worst_quality = cpi->twopass.active_worst_quality; int q; int *inter_minq; + const int boost_frame = + !rc->is_src_frame_alt_ref && + (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); + ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); if (oxcf->rc_mode == VPX_Q) @@ -1419,8 +1423,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, if (frame_is_intra_only(cm)) { pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality); - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { + } else if (boost_frame) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. @@ -1464,9 +1467,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // Extension to max or min Q if undershoot or overshoot is outside // the permitted range. - if (frame_is_intra_only(cm) || - (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { + if (frame_is_intra_only(cm) || boost_frame) { active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); active_worst_quality += (cpi->twopass.extend_maxq / 2); @@ -1474,13 +1475,9 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; active_worst_quality += cpi->twopass.extend_maxq; - } - // For normal frames do not allow an active minq lower than the q used for - // the last boosted frame. - if (!frame_is_intra_only(cm) && - (!(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) || - rc->is_src_frame_alt_ref)) { + // For normal frames do not allow an active minq lower than the q used for + // the last boosted frame. active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex); } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 18b74f57b..c01e5f81b 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -176,8 +176,27 @@ static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { // largest dc_quant is 21387, therefore rdmult should always fit in int32_t const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); - int rdmult = q * q; - rdmult = rdmult * 3 + (rdmult * 2 / 3); + uint32_t rdmult = q * q; + + if (cpi->common.frame_type != KEY_FRAME) { + if (qindex < 1) + rdmult = rdmult * 3 + (rdmult * 2 / 3); + else if (qindex < 128) + rdmult = rdmult * 4; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 3; + } else { + if (qindex < 64) + rdmult = rdmult * 4; + else if (qindex <= 128) + rdmult = rdmult * 3 + rdmult / 2; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 7 + rdmult / 2; + } #if CONFIG_VP9_HIGHBITDEPTH switch (cpi->common.bit_depth) { case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 2e1aa1d30..24c500fbd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2315,6 +2315,61 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, block_size); } +#if CONFIG_NON_GREEDY_MV +#define MAX_PREV_NB_FULL_MV_NUM 8 +static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + int ref_frame, BLOCK_SIZE bsize, int mi_row, + int mi_col, int_mv *nb_full_mvs) { + int i; + const TileInfo *tile = &xd->tile; + int full_mv_num = 0; + assert(bsize >= BLOCK_8X8); + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_blocks[bsize][i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *nb_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + if (nb_mi->sb_type >= BLOCK_8X8) { + if (nb_mi->ref_frame[0] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } else if (nb_mi->ref_frame[1] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } + } else { + int j; + for (j = 0; j < 4; ++j) { + // TODO(angiebird): avoid using duplicated mvs + if (nb_mi->ref_frame[0] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = + get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } else if (nb_mi->ref_frame[1] == ref_frame) { + nb_full_mvs[full_mv_num].as_mv = + get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv); + ++full_mv_num; + if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) { + return full_mv_num; + } + } + } + } + } + } + return full_mv_num; +} +#endif // CONFIG_NON_GREEDY_MV + static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { @@ -2338,11 +2393,12 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, #if CONFIG_NON_GREEDY_MV double mv_dist = 0; double mv_cost = 0; - double lambda = 0; + double lambda = (pw * ph) / 4; double bestsme; - int_mv nb_full_mvs[NB_MVS_NUM]; - // TODO(angiebird): Set nb_full_mvs properly. - vp9_zero(nb_full_mvs); + int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM]; + + const int nb_full_mv_num = + find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs); #else // CONFIG_NON_GREEDY_MV int bestsme = INT_MAX; int sadpb = x->sadperbit16; @@ -2418,9 +2474,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV - bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, - &tmp_mv->as_mv, &mv_dist, &mv_cost); + bestsme = vp9_full_pixel_diamond_new( + cpi, x, &mvp_full, step_param, lambda, 1, &cpi->fn_ptr[bsize], + nb_full_mvs, nb_full_mv_num, &tmp_mv->as_mv, &mv_dist, &mv_cost); #else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, @@ -2461,8 +2517,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, #if CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_diamond_new( cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), - lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, &tmp_mv->as_mv, &mv_dist, - &mv_cost); + lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, &this_mv, + &mv_dist, &mv_cost); #else // CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 9b6c69a73..4b584083b 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -533,7 +533,7 @@ static void set_rt_speed_feature_framesize_independent( sf->adjust_partitioning_from_last_frame = cm->last_frame_type != cm->frame_type || (0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency); - sf->mv.subpel_force_stop = 1; + sf->mv.subpel_force_stop = QUARTER_PEL; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; sf->intra_uv_mode_mask[i] = INTRA_DC; @@ -710,7 +710,6 @@ static void set_rt_speed_feature_framesize_independent( // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. - // TODO(jianj): Investigate webm:1578 if (cpi->use_svc && cpi->svc.use_partition_reuse && cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) @@ -731,7 +730,7 @@ static void set_rt_speed_feature_framesize_independent( if (cpi->row_mt && cpi->oxcf.max_threads > 1) sf->adaptive_rd_thresh_row_mt = 1; - if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = 3; + if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = FULL_PEL; if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF; // Only keep INTRA_DC mode for speed 8. if (!is_keyframe) { @@ -767,8 +766,8 @@ static void set_rt_speed_feature_framesize_independent( sf->mv.adapt_subpel_force_stop.mv_thresh = 2; if (cpi->rc.avg_frame_low_motion < 40) sf->mv.adapt_subpel_force_stop.mv_thresh = 1; - sf->mv.adapt_subpel_force_stop.force_stop_below = 1; - sf->mv.adapt_subpel_force_stop.force_stop_above = 2; + sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL; + sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL; // Disable partition blocks below 16x16, except for low-resolutions. if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240) sf->disable_16x16part_nonkey = 1; @@ -797,18 +796,11 @@ static void set_rt_speed_feature_framesize_independent( } // Special case for screen content: increase motion search on base spatial // layer when high motion is detected or previous SL0 frame was dropped. - // Avoid speed 5 for as there is an issue with SVC datarate test. - // TODO(marpan/jianj): Investigate issue at speed 5. - if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed > 5 && + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 && cpi->svc.spatial_layer_id == 0 && (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) { sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 2; - // TODO(marpan/jianj): Investigate issue for lower setting of step_param - // for spatial layers (namely on lower layers). - if (cpi->use_svc && cm->width != cpi->oxcf.width && - cm->height != cpi->oxcf.height) - sf->mv.fullpel_search_step_param = 4; } } @@ -855,12 +847,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { @@ -876,7 +862,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->recode_loop = ALLOW_RECODE_FIRST; sf->mv.subpel_search_method = SUBPEL_TREE; sf->mv.subpel_search_level = 2; - sf->mv.subpel_force_stop = 0; + sf->mv.subpel_force_stop = EIGHTH_PEL; sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); sf->mv.reduce_first_step_size = 0; sf->coeff_prob_appx_step = 1; @@ -993,7 +979,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->optimize_coefficients = 0; } - if (sf->mv.subpel_force_stop == 3) { + if (sf->mv.subpel_force_stop == FULL_PEL) { // Whole pel only cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE) { @@ -1006,6 +992,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore; } + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test == 1) + cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; + else if (cpi->oxcf.motion_vector_unit_test == 2) + cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; + x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; x->min_partition_size = sf->default_min_partition_size; @@ -1023,10 +1015,4 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 02673e602..9b09ec474 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -167,15 +167,17 @@ typedef enum { ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; +typedef enum { EIGHTH_PEL, QUARTER_PEL, HALF_PEL, FULL_PEL } SUBPEL_FORCE_STOP; + typedef struct ADAPT_SUBPEL_FORCE_STOP { // Threshold for full pixel motion vector; int mv_thresh; // subpel_force_stop if full pixel MV is below the threshold. - int force_stop_below; + SUBPEL_FORCE_STOP force_stop_below; // subpel_force_stop if full pixel MV is equal to or above the threshold. - int force_stop_above; + SUBPEL_FORCE_STOP force_stop_above; } ADAPT_SUBPEL_FORCE_STOP; typedef struct MV_SPEED_FEATURES { @@ -200,12 +202,8 @@ typedef struct MV_SPEED_FEATURES { // extensive subpel search. int subpel_search_level; - // Control when to stop subpel search: - // 0: Full subpel search. - // 1: Stop at quarter pixel. - // 2: Stop at half pixel. - // 3: Stop at full pixel. - int subpel_force_stop; + // When to stop subpel motion search. + SUBPEL_FORCE_STOP subpel_force_stop; // If it's enabled, different subpel_force_stop will be used for different MV. int enable_adaptive_subpel_force_stop; |