diff options
author | Angie Chiang <angiebird@google.com> | 2018-10-16 12:31:13 -0700 |
---|---|---|
committer | Angie Chiang <angiebird@google.com> | 2018-10-17 09:03:33 -0700 |
commit | d3c78fd71ffea6c55f2705de2eb0c92449c4fc82 (patch) | |
tree | e1fa396daf5647f348f3b48e90550a9e0cd02f24 | |
parent | 534e61d5a3fc121bfcc52969acc4c9e7ac798d14 (diff) | |
download | libvpx-d3c78fd71ffea6c55f2705de2eb0c92449c4fc82.tar libvpx-d3c78fd71ffea6c55f2705de2eb0c92449c4fc82.tar.gz libvpx-d3c78fd71ffea6c55f2705de2eb0c92449c4fc82.tar.bz2 libvpx-d3c78fd71ffea6c55f2705de2eb0c92449c4fc82.zip |
Variant implementation of changing mv search order
We start mv search from the block with highest feature score, then
move on to the block's neighbors with with an searching order using
their feature scores.
We use max heap to help us achieve the functionality.
This feature is under flag USE_PQSORT
Change-Id: Ie5dc5ea715b0f9a7a594e5080a7cb4f5309f5597
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 134 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 2 |
2 files changed, 132 insertions, 4 deletions
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 2c5dc4f19..ecb4d944c 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2368,6 +2368,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, CHECK_MEM_ERROR( cm, cpi->feature_score_loc_sort, vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_heap, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap))); #endif // TODO(jingning): Reduce the actual memory use for tpl model build up. for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { @@ -2588,6 +2591,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) { #if CONFIG_NON_GREEDY_MV vpx_free(cpi->feature_score_loc_arr); vpx_free(cpi->feature_score_loc_sort); + vpx_free(cpi->feature_score_loc_heap); #endif for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); @@ -5967,7 +5971,7 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } #if CONFIG_NON_GREEDY_MV -int compare_feature_score(const void *a, const void *b) { +static int compare_feature_score(const void *a, const void *b) { const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; if (aa->feature_score < bb->feature_score) { @@ -5978,7 +5982,86 @@ int compare_feature_score(const void *a, const void *b) { return 0; } } -#endif + +#define USE_PQSORT 1 + +#if USE_PQSORT +static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC **output) { + if (*size > 0) { + *output = heap[0]; + --*size; + if (*size > 0) { + int p, l, r; + heap[0] = heap[*size]; + p = 0; + l = 2 * p + 1; + r = 2 * p + 2; + while (l < *size) { + FEATURE_SCORE_LOC *tmp; + int c = l; + if (r < *size && heap[r]->feature_score > heap[l]->feature_score) { + c = r; + } + if (heap[p]->feature_score >= heap[c]->feature_score) { + break; + } + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + p = c; + l = 2 * p + 1; + r = 2 * p + 2; + } + } + } else { + assert(0); + } +} + +static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC *input) { + int c, p; + FEATURE_SCORE_LOC *tmp; + heap[*size] = input; + ++*size; + c = *size - 1; + p = c >> 1; + while (c > 0) { + if (heap[c]->feature_score > heap[p]->feature_score) { + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + c = p; + p = c >> 1; + } else { + break; + } + } +} + +static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int *heap_size) { + const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; + const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int i; + for (i = 0; i < NB_MVS_NUM; ++i) { + int r = dirs[i][0] * mi_unit; + int c = dirs[i][1] * mi_unit; + if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && + mi_col + c < tpl_frame->mi_cols) { + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride + + (mi_col + c)]; + if (fs_loc->visited == 0) { + max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc); + } + } + } +} +#endif // USE_PQSORT +#endif // CONFIG_NON_GREEDY_MV void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, BLOCK_SIZE bsize) { @@ -6013,8 +6096,12 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, #if CONFIG_NON_GREEDY_MV int rf_idx; int fs_loc_sort_size; +#if USE_PQSORT + int fs_loc_heap_size; +#else int i; -#endif +#endif // USE_PQSORT +#endif // CONFIG_NON_GREEDY_MV // Setup scaling factor #if CONFIG_VP9_HIGHBITDEPTH @@ -6072,6 +6159,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; tpl_stats->feature_score = get_feature_score( xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); + fs_loc->visited = 0; fs_loc->feature_score = tpl_stats->feature_score; fs_loc->mi_row = mi_row; fs_loc->mi_col = mi_col; @@ -6083,6 +6171,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, sizeof(*cpi->feature_score_loc_sort), compare_feature_score); +#if !USE_PQSORT for (i = 0; i < fs_loc_sort_size; ++i) { int mb_y_offset; TplDepStats *tpl_stats; @@ -6108,8 +6197,45 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, bsize, mi_row, mi_col, tpl_stats, rf_idx); } } +#else // !USE_PQSORT + fs_loc_heap_size = 0; + max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, + cpi->feature_score_loc_sort[0]); -#endif + while (fs_loc_heap_size > 0) { + int mb_y_offset; + TplDepStats *tpl_stats; + FEATURE_SCORE_LOC *fs_loc; + + max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); + + fs_loc->visited = 1; + + mi_row = fs_loc->mi_row; + mi_col = fs_loc->mi_col; + + mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + tpl_stats = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + + set_mv_limits(cm, x, mi_row, mi_col); + + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + if (ref_frame[rf_idx] == NULL) { + tpl_stats->ready[rf_idx] = 0; + continue; + } else { + tpl_stats->ready[rf_idx] = 1; + } + motion_compensated_prediction( + cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, + bsize, mi_row, mi_col, tpl_stats, rf_idx); + } + add_nb_blocks_to_heap(cpi, tpl_frame, bsize, mi_row, mi_col, + &fs_loc_heap_size); + } +#endif // !USE_PQSORT +#endif // CONFIG_NON_GREEDY_MV for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index a800ab943..07289588c 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -505,6 +505,7 @@ typedef struct EncFrameBuf { #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) #if CONFIG_NON_GREEDY_MV typedef struct FEATURE_SCORE_LOC { + int visited; double feature_score; int mi_row; int mi_col; @@ -540,6 +541,7 @@ typedef struct VP9_COMP { #if CONFIG_NON_GREEDY_MV FEATURE_SCORE_LOC *feature_score_loc_arr; FEATURE_SCORE_LOC **feature_score_loc_sort; + FEATURE_SCORE_LOC **feature_score_loc_heap; #endif TileDataEnc *tile_data; |