diff options
author | Angie Chiang <angiebird@google.com> | 2018-10-17 23:10:50 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2018-10-17 23:10:50 +0000 |
commit | 22d67c346fde9630b9accfdd3b3abe9840a061fe (patch) | |
tree | c909ede261ab68a8ac198450b182a39526a514e7 /vp9/encoder | |
parent | 88030f7d6616db6a764b9d1165245b83947fb769 (diff) | |
parent | 6127de7a7dc05c64277cdf6b531d5588c4842d6e (diff) | |
download | libvpx-22d67c346fde9630b9accfdd3b3abe9840a061fe.tar libvpx-22d67c346fde9630b9accfdd3b3abe9840a061fe.tar.gz libvpx-22d67c346fde9630b9accfdd3b3abe9840a061fe.tar.bz2 libvpx-22d67c346fde9630b9accfdd3b3abe9840a061fe.zip |
Merge changes I6d5c77af,I6bf504b4,Ie5dc5ea7,Ie6024b1a,If45fba8a, ...
* changes:
Add do_motion_search
Preserve code of doing mv search in raster order
Variant implementation of changing mv search order
Add feature_score_loc_sort
Init mv_[dist/cost]_sum in init_tpl_stats
Change mv search order according to feature_score
Diffstat (limited to 'vp9/encoder')
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 221 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 13 |
2 files changed, 209 insertions, 25 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index e902a832a..51b476ef6 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2361,6 +2361,17 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, if (cpi->sf.enable_tpl_model) { const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); +#if CONFIG_NON_GREEDY_MV + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_arr, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_sort, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_heap, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap))); +#endif // TODO(jingning): Reduce the actual memory use for tpl model build up. for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, @@ -2577,6 +2588,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_denoiser_free(&(cpi->denoiser)); #endif +#if CONFIG_NON_GREEDY_MV + vpx_free(cpi->feature_score_loc_arr); + vpx_free(cpi->feature_score_loc_sort); + vpx_free(cpi->feature_score_loc_heap); +#endif for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); cpi->tpl_stats[frame].is_valid = 0; @@ -5484,6 +5500,13 @@ void init_tpl_stats(VP9_COMP *cpi) { int frame_idx; for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; +#if CONFIG_NON_GREEDY_MV + int rf_idx; + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + tpl_frame->mv_dist_sum[rf_idx] = 0; + tpl_frame->mv_cost_sum[rf_idx] = 0; + } +#endif memset(tpl_frame->tpl_stats_ptr, 0, tpl_frame->height * tpl_frame->width * sizeof(*tpl_frame->tpl_stats_ptr)); @@ -5947,6 +5970,131 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, tpl_stats->mv.as_int = best_mv.as_int; } +#if CONFIG_NON_GREEDY_MV +static int compare_feature_score(const void *a, const void *b) { + const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; + const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; + if (aa->feature_score < bb->feature_score) { + return 1; + } else if (aa->feature_score > bb->feature_score) { + return -1; + } else { + return 0; + } +} + +static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx, + YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize, + int mi_row, int mi_col) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + int rf_idx; + + set_mv_limits(cm, x, mi_row, mi_col); + + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + if (ref_frame[rf_idx] == NULL) { + tpl_stats->ready[rf_idx] = 0; + continue; + } else { + tpl_stats->ready[rf_idx] = 1; + } + motion_compensated_prediction( + cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, + mi_row, mi_col, tpl_stats, rf_idx); + } +} + +#define CHANGE_MV_SEARCH_ORDER 1 +#define USE_PQSORT 1 + +#if CHANGE_MV_SEARCH_ORDER +#if USE_PQSORT +static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC **output) { + if (*size > 0) { + *output = heap[0]; + --*size; + if (*size > 0) { + int p, l, r; + heap[0] = heap[*size]; + p = 0; + l = 2 * p + 1; + r = 2 * p + 2; + while (l < *size) { + FEATURE_SCORE_LOC *tmp; + int c = l; + if (r < *size && heap[r]->feature_score > heap[l]->feature_score) { + c = r; + } + if (heap[p]->feature_score >= heap[c]->feature_score) { + break; + } + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + p = c; + l = 2 * p + 1; + r = 2 * p + 2; + } + } + } else { + assert(0); + } +} + +static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC *input) { + int c, p; + FEATURE_SCORE_LOC *tmp; + heap[*size] = input; + ++*size; + c = *size - 1; + p = c >> 1; + while (c > 0) { + if (heap[c]->feature_score > heap[p]->feature_score) { + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + c = p; + p = c >> 1; + } else { + break; + } + } +} + +static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int *heap_size) { + const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; + const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int i; + for (i = 0; i < NB_MVS_NUM; ++i) { + int r = dirs[i][0] * mi_unit; + int c = dirs[i][1] * mi_unit; + if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && + mi_col + c < tpl_frame->mi_cols) { + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride + + (mi_col + c)]; + if (fs_loc->visited == 0) { + max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc); + } + } + } +} +#endif // USE_PQSORT +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV + void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, BLOCK_SIZE bsize) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; @@ -5979,7 +6127,15 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, int64_t recon_error, sse; #if CONFIG_NON_GREEDY_MV int rf_idx; -#endif + int fs_loc_sort_size; +#if CHANGE_MV_SEARCH_ORDER +#if USE_PQSORT + int fs_loc_heap_size; +#else + int i; +#endif // USE_PQSORT +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV // Setup scaling factor #if CONFIG_VP9_HIGHBITDEPTH @@ -6023,6 +6179,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, #if CONFIG_NON_GREEDY_MV tpl_frame->lambda = 250; + fs_loc_sort_size = 0; for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { @@ -6032,41 +6189,55 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, const int bh = 4 << b_height_log2_lookup[bsize]; TplDepStats *tpl_stats = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; tpl_stats->feature_score = get_feature_score( xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); + fs_loc->visited = 0; + fs_loc->feature_score = tpl_stats->feature_score; + fs_loc->mi_row = mi_row; + fs_loc->mi_col = mi_col; + cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc; + ++fs_loc_sort_size; } } - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - tpl_frame->mv_dist_sum[rf_idx] = 0; - tpl_frame->mv_cost_sum[rf_idx] = 0; + qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, + sizeof(*cpi->feature_score_loc_sort), compare_feature_score); + +#if CHANGE_MV_SEARCH_ORDER +#if !USE_PQSORT + for (i = 0; i < fs_loc_sort_size; ++i) { + FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i]; + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); } +#else // !USE_PQSORT + fs_loc_heap_size = 0; + max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, + cpi->feature_score_loc_sort[0]); - for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - const int mb_y_offset = - mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; - TplDepStats *tpl_stats = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + while (fs_loc_heap_size > 0) { + FEATURE_SCORE_LOC *fs_loc; + max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); - set_mv_limits(cm, x, mi_row, mi_col); + fs_loc->visited = 1; - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - if (ref_frame[rf_idx] == NULL) { - tpl_stats->ready[rf_idx] = 0; - continue; - } else { - tpl_stats->ready[rf_idx] = 1; - } - motion_compensated_prediction( - cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, - ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, - bsize, mi_row, mi_col, tpl_stats, rf_idx); - } + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); + + add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col, + &fs_loc_heap_size); + } +#endif // !USE_PQSORT +#else // CHANGE_MV_SEARCH_ORDER + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col); } } - -#endif +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 9e58f2bb0..07289588c 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -503,6 +503,14 @@ typedef struct EncFrameBuf { // Maximum operating frame buffer size needed for a GOP using ARF reference. #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) +#if CONFIG_NON_GREEDY_MV +typedef struct FEATURE_SCORE_LOC { + int visited; + double feature_score; + int mi_row; + int mi_col; +} FEATURE_SCORE_LOC; +#endif typedef struct VP9_COMP { QUANTS quants; @@ -530,6 +538,11 @@ typedef struct VP9_COMP { TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; EncFrameBuf enc_frame_buf[REF_FRAMES]; +#if CONFIG_NON_GREEDY_MV + FEATURE_SCORE_LOC *feature_score_loc_arr; + FEATURE_SCORE_LOC **feature_score_loc_sort; + FEATURE_SCORE_LOC **feature_score_loc_heap; +#endif TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. |