summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_filter.c18
-rw-r--r--vp9/common/vp9_filter.h3
-rw-r--r--vp9/common/vp9_onyxc_int.h2
-rw-r--r--vp9/common/vp9_thread_common.c157
-rw-r--r--vp9/common/vp9_thread_common.h19
-rw-r--r--vp9/decoder/vp9_decodeframe.c100
-rw-r--r--vp9/decoder/vp9_decoder.h3
-rw-r--r--vp9/encoder/vp9_block.h5
-rw-r--r--vp9/encoder/vp9_encodeframe.c391
-rw-r--r--vp9/encoder/vp9_encoder.c55
-rw-r--r--vp9/encoder/vp9_encoder.h1
-rw-r--r--vp9/encoder/vp9_firstpass.c98
-rw-r--r--vp9/encoder/vp9_firstpass.h27
-rw-r--r--vp9/encoder/vp9_mbgraph.c3
-rw-r--r--vp9/encoder/vp9_mcomp.c223
-rw-r--r--vp9/encoder/vp9_mcomp.h2
-rw-r--r--vp9/encoder/vp9_partition_models.h306
-rw-r--r--vp9/encoder/vp9_pickmode.c9
-rw-r--r--vp9/encoder/vp9_ratectrl.c28
-rw-r--r--vp9/encoder/vp9_rdopt.c12
-rw-r--r--vp9/encoder/vp9_speed_features.c20
-rw-r--r--vp9/encoder/vp9_speed_features.h29
-rw-r--r--vp9/encoder/vp9_temporal_filter.c5
-rw-r--r--vp9/vp9_dx_iface.c11
-rw-r--r--vp9/vp9_dx_iface.h1
25 files changed, 1347 insertions, 181 deletions
diff --git a/vp9/common/vp9_filter.c b/vp9/common/vp9_filter.c
index 6c43af8ce..cadae6f2e 100644
--- a/vp9/common/vp9_filter.c
+++ b/vp9/common/vp9_filter.c
@@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel,
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }
};
-const InterpKernel *vp9_filter_kernels[4] = {
- sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -3, 125, 8, -2, 0, 0 },
+ { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 115, 27, -6, 0, 0 },
+ { 0, 0, -10, 108, 37, -7, 0, 0 }, { 0, 0, -11, 101, 47, -9, 0, 0 },
+ { 0, 0, -11, 93, 56, -10, 0, 0 }, { 0, 0, -12, 85, 66, -11, 0, 0 },
+ { 0, 0, -11, 75, 75, -11, 0, 0 }, { 0, 0, -11, 66, 85, -12, 0, 0 },
+ { 0, 0, -10, 56, 93, -11, 0, 0 }, { 0, 0, -9, 47, 101, -11, 0, 0 },
+ { 0, 0, -7, 37, 108, -10, 0, 0 }, { 0, 0, -6, 27, 115, -8, 0, 0 },
+ { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 125, -3, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+ sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+ sub_pel_filters_4
};
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h
index b379665b1..0382c88e7 100644
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -25,6 +25,7 @@ extern "C" {
#define EIGHTTAP_SHARP 2
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
#define BILINEAR 3
+#define FOURTAP 4
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
@@ -32,7 +33,7 @@ extern "C" {
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp9_filter_kernels[4];
+extern const InterpKernel *vp9_filter_kernels[5];
#ifdef __cplusplus
} // extern "C"
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index 45d3b0f82..c5c63e476 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -259,6 +259,8 @@ typedef struct VP9Common {
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
int above_context_alloc_cols;
+
+ int lf_row;
} VP9_COMMON;
static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
diff --git a/vp9/common/vp9_thread_common.c b/vp9/common/vp9_thread_common.c
index d4b076645..36530fae6 100644
--- a/vp9/common/vp9_thread_common.c
+++ b/vp9/common/vp9_thread_common.c
@@ -229,6 +229,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
workers, num_workers, lf_sync);
}
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
+ int num_workers) {
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+
+ if (!frame_filter_level) return;
+
+ if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+ num_workers > lf_sync->num_workers) {
+ vp9_loop_filter_dealloc(lf_sync);
+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+ lf_sync->corrupted = 0;
+
+ memset(lf_sync->num_tiles_done, 0,
+ sizeof(*lf_sync->num_tiles_done) * sb_rows);
+ cm->lf_row = 0;
+}
+
// Set up nsync by width.
static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
@@ -266,6 +288,25 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
pthread_cond_init(&lf_sync->cond[i], NULL);
}
}
+ pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
+ vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
+ if (lf_sync->recon_done_mutex) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
+ vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
+ if (lf_sync->recon_done_cond) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
+ }
+ }
}
#endif // CONFIG_MULTITHREAD
@@ -276,6 +317,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
+ vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
+ mi_cols_aligned_to_sb(cm->mi_rows) >>
+ MI_BLOCK_SIZE_LOG2));
+
// Set up nsync.
lf_sync->sync_range = get_sync_range(width);
}
@@ -298,15 +344,126 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
}
vpx_free(lf_sync->cond);
}
+ if (lf_sync->recon_done_mutex != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
+ }
+ vpx_free(lf_sync->recon_done_mutex);
+ }
+
+ pthread_mutex_destroy(&lf_sync->lf_mutex);
+ if (lf_sync->recon_done_cond != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
+ }
+ vpx_free(lf_sync->recon_done_cond);
+ }
#endif // CONFIG_MULTITHREAD
+
vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
+ vpx_free(lf_sync->num_tiles_done);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
vp9_zero(*lf_sync);
}
}
+static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
+ int return_val = -1;
+ int cur_row;
+ const int max_rows = cm->mi_rows;
+
+#if CONFIG_MULTITHREAD
+ const int tile_cols = 1 << cm->log2_tile_cols;
+
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+
+ if (return_val == -1) return return_val;
+
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
+ if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
+ pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
+ &lf_sync->recon_done_mutex[cur_row]);
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (lf_sync->corrupted) {
+ return_val = -1;
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+#else
+ (void)lf_sync;
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ return return_val;
+}
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+ int mi_row;
+ VP9_COMMON *cm = lf_data->cm;
+
+ while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) {
+ lf_data->start = mi_row;
+ lf_data->stop = mi_row + MI_BLOCK_SIZE;
+
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
+ }
+}
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ lf_sync->corrupted |= corrupted;
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
+ lf_sync->num_tiles_done[row] += 1;
+ if (num_tiles == lf_sync->num_tiles_done[row]) {
+ if (is_last_row) {
+ /* The last 2 rows wait on the last row to be done.
+ * So, we have to broadcast the signal in this case.
+ */
+ pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
+ } else {
+ pthread_cond_signal(&lf_sync->recon_done_cond[row]);
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
+#else
+ (void)lf_sync;
+ (void)num_tiles;
+ (void)row;
+ (void)is_last_row;
+ (void)corrupted;
+#endif // CONFIG_MULTITHREAD
+}
+
// Accumulate frame counts.
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
diff --git a/vp9/common/vp9_thread_common.h b/vp9/common/vp9_thread_common.h
index f92df5bd6..b97e9ee13 100644
--- a/vp9/common/vp9_thread_common.h
+++ b/vp9/common/vp9_thread_common.h
@@ -37,6 +37,14 @@ typedef struct VP9LfSyncData {
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t lf_mutex;
+ pthread_mutex_t *recon_done_mutex;
+ pthread_cond_t *recon_done_cond;
+#endif
+ int *num_tiles_done;
+ int corrupted;
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
@@ -53,6 +61,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
int partial_frame, VPxWorker *workers,
int num_workers, VP9LfSync *lf_sync);
+// Multi-threaded loopfilter initialisations
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
+ int frame_filter_level, int num_workers);
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted);
+
+void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
+
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
const struct FRAME_COUNTS *counts, int is_dec);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 48c49e2f5..95e376d04 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1451,6 +1451,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
return vpx_reader_find_end(&tile_data->bit_reader);
}
+static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows,
+ int num_tiles_left, int total_num_tiles) {
+ do {
+ int mi_row;
+ const int aligned_rows = mi_cols_aligned_to_sb(mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int corrupted = 1;
+ for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) {
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2,
+ is_last_row, corrupted);
+ }
+ /* If there are multiple tiles, the second tile should start marking row
+ * progress from row 0.
+ */
+ start_row = 0;
+ } while (num_tiles_left--);
+}
+
// On entry 'tile_data->data_end' points to the end of the input frame, on exit
// it is updated to reflect the bitreader position of the final tile column if
// present in the tile buffer group or NULL otherwise.
@@ -1461,6 +1480,12 @@ static int tile_worker_hook(void *arg1, void *arg2) {
TileInfo *volatile tile = &tile_data->xd.tile;
const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
const uint8_t *volatile bit_reader_end = NULL;
+ VP9_COMMON *cm = &pbi->common;
+
+ LFWorkerData *lf_data = tile_data->lf_data;
+ VP9LfSync *lf_sync = tile_data->lf_sync;
+
+ volatile int mi_row = 0;
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
@@ -1468,14 +1493,26 @@ static int tile_worker_hook(void *arg1, void *arg2) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
tile_data->data_end = NULL;
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int num_tiles_left = tile_data->buf_end - n;
+ const int mi_row_start = mi_row;
+ set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left,
+ 1 << cm->log2_tile_cols);
+ }
return 0;
}
tile_data->xd.corrupted = 0;
do {
- int mi_row, mi_col;
+ int mi_col;
const TileBuffer *const buf = pbi->tile_buffers + n;
+
+ /* Initialize to 0 is safe since we do not deal with streams that have
+ * more than one row of tiles. (So tile->mi_row_start will be 0)
+ */
+ assert(cm->log2_tile_rows == 0);
+ mi_row = 0;
vp9_zero(tile_data->dqcoeff);
vp9_tile_init(tile, &pbi->common, 0, buf->col);
setup_token_decoder(buf->data, tile_data->data_end, buf->size,
@@ -1493,6 +1530,14 @@ static int tile_worker_hook(void *arg1, void *arg2) {
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
}
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
+ mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row,
+ tile_data->xd.corrupted);
+ }
}
if (buf->col == final_col) {
@@ -1500,6 +1545,21 @@ static int tile_worker_hook(void *arg1, void *arg2) {
}
} while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
+ if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ /* This was not incremented in the tile loop, so increment before tiles left
+ * calculation
+ */
+ ++n;
+ set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n,
+ 1 << cm->log2_tile_cols);
+ }
+
+ if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ vp9_loopfilter_rows(lf_data, lf_sync);
+ }
+
tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
@@ -1516,6 +1576,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
VP9_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
+ VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
+ YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1542,12 +1604,26 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
}
}
+ // Initialize LPF
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
+ pbi->num_tile_workers);
+ }
+
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data =
&pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
+
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ tile_data->lf_sync = lf_row_sync;
+ tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
+ vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
+ tile_data->lf_data->y_only = 0;
+ }
+
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -2069,17 +2145,19 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- if (!xd->corrupted) {
- if (!cm->skip_loop_filter) {
- // If multiple threads are used to decode tiles, then we use those
- // threads to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!pbi->lpf_mt_opt) {
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
}
- } else {
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Decode failed. Frame data is corrupted.");
}
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 1c488961a..425c8964c 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -37,6 +37,8 @@ typedef struct TileWorkerData {
int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive
vpx_reader bit_reader;
FRAME_COUNTS counts;
+ LFWorkerData *lf_data;
+ VP9LfSync *lf_sync;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
@@ -74,6 +76,7 @@ typedef struct VP9Decoder {
int hold_ref_buf; // hold the reference buffer.
int row_mt;
+ int lpf_mt_opt;
} VP9Decoder;
int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 6ea264f09..563fdbbde 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -208,6 +208,11 @@ struct macroblock {
void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest,
int stride, int eob, int bd);
#endif
+#if CONFIG_ML_VAR_PARTITION
+ DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]);
+#endif // CONFIG_ML_VAR_PARTITION
+
+ struct scale_factors *me_sf;
};
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f1527f930..72dc13797 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3393,6 +3393,139 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
#undef FEATURES
#undef LABELS
+// Use a neural net model to prune partition-none and partition-split search.
+// The model uses prediction residue variance and quantization step size as
+// input features.
+#define FEATURES 6
+static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int *none, int *split) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ const NN_CONFIG *nn_config = NULL;
+ DECLARE_ALIGNED(16, uint8_t, pred_buf[64 * 64]);
+ const int speed = cpi->oxcf.speed;
+ int i;
+ float thresh = 0.0f;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ nn_config = &vp9_var_rd_part_nnconfig_64;
+ thresh = speed > 0 ? 3.5f : 3.0f;
+ break;
+ case BLOCK_32X32:
+ nn_config = &vp9_var_rd_part_nnconfig_32;
+ thresh = speed > 0 ? 3.5f : 3.0f;
+ break;
+ case BLOCK_16X16:
+ nn_config = &vp9_var_rd_part_nnconfig_16;
+ thresh = speed > 0 ? 3.5f : 4.0f;
+ break;
+ case BLOCK_8X8:
+ nn_config = &vp9_var_rd_part_nnconfig_8;
+ if (cm->width >= 720 && cm->height >= 720)
+ thresh = speed > 0 ? 2.5f : 2.0f;
+ else
+ thresh = speed > 0 ? 3.5f : 2.0f;
+ break;
+ default: assert(0 && "Unexpected block size."); return;
+ }
+
+ if (!nn_config) return;
+
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = bsize;
+ // Do a simple single motion search to find a prediction for current block.
+ // The variance of the residue will be used as input features.
+ {
+ const MV_REFERENCE_FRAME ref =
+ cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
+ MV ref_mv = { 0, 0 };
+ MV ref_mv_full = { 0, 0 };
+ const int step_param = 1;
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ const SEARCH_METHODS search_method = NSTEP;
+ const int sadpb = x->sadperbit16;
+ MV best_mv = { 0, 0 };
+ int cost_list[5];
+
+ assert(yv12 != NULL);
+ if (!yv12) return;
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ref - 1].sf);
+ mi->ref_frame[0] = ref;
+ vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
+ vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
+ search_method, sadpb, cond_cost_list(cpi, cost_list),
+ &ref_mv, &best_mv, 0, 0);
+ best_mv.row *= 8;
+ best_mv.col *= 8;
+ x->mv_limits = tmp_mv_limits;
+ mi->mv[0].as_mv = best_mv;
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = pred_buf;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ }
+
+ vpx_clear_system_state();
+
+ {
+ float features[FEATURES] = { 0.0f };
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+ int feature_idx = 0;
+ float score;
+
+ // Generate model input features.
+ features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ // Get the variance of the residue as input features.
+ {
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const uint8_t *pred = pred_buf;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const int pred_stride = 64;
+ unsigned int sse;
+ // Variance of whole block.
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+ const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+ features[feature_idx++] = logf((float)var + 1.0f);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ // Variance of quarter block.
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred + pred_offset, pred_stride, &sse);
+ const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+ features[feature_idx++] = var_ratio;
+ }
+ }
+ assert(feature_idx == FEATURES);
+
+ // Feed the features into the model to get the confidence score.
+ nn_predict(features, nn_config, &score);
+
+ // Higher score means that the model has higher confidence that the split
+ // partition is better than the non-split partition. So if the score is
+ // high enough, we skip the none-split partition search; if the score is
+ // low enough, we skip the split partition search.
+ if (score > thresh) *none = 0;
+ if (score < -thresh) *split = 0;
+ }
+}
+#undef FEATURES
+#undef LABELS
+
int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
int orig_rdmult) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
@@ -3624,6 +3757,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_NONE;
+ if (cpi->sf.ml_var_partition_pruning) {
+ int do_ml_var_partition_pruning =
+ !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
+ mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
+ mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ do_ml_var_partition_pruning = 0;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (do_ml_var_partition_pruning) {
+ ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
+ &partition_none_allowed, &do_split);
+ }
+ }
+
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
@@ -3738,6 +3886,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ } else {
+ vp9_zero(ctx->pred_mv);
+ ctx->mic.interp_filter = EIGHTTAP;
}
// store estimated motion vector
@@ -4345,6 +4496,83 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
}
}
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+#define LABELS 2
+static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const NN_CONFIG *nn_config = NULL;
+ float thresh_low = -0.2f;
+ float thresh_high = 0.0f;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ nn_config = &vp9_var_part_nnconfig_64;
+ thresh_low = -0.3f;
+ thresh_high = -0.1f;
+ break;
+ case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
+ case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
+ case BLOCK_8X8: break;
+ default: assert(0 && "Unexpected block size."); return -1;
+ }
+
+ if (!nn_config) return -1;
+
+ vpx_clear_system_state();
+
+ {
+ float features[FEATURES] = { 0.0f };
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+ int feature_idx = 0;
+ float score[LABELS];
+
+ features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ {
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const int sb_offset_row = 8 * (mi_row & 7);
+ const int sb_offset_col = 8 * (mi_col & 7);
+ const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const int pred_stride = 64;
+ unsigned int sse;
+ int i;
+ // Variance of whole block.
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+ const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+ features[feature_idx++] = logf((float)var + 1.0f);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ // Variance of quarter block.
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred + pred_offset, pred_stride, &sse);
+ const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+ features[feature_idx++] = var_ratio;
+ }
+ }
+
+ assert(feature_idx == FEATURES);
+ nn_predict(features, nn_config, score);
+ if (score[0] > thresh_high) return 3;
+ if (score[0] < thresh_low) return 0;
+ return -1;
+ }
+}
+#undef FEATURES
+#undef LABELS
+#endif // CONFIG_ML_VAR_PARTITION
+
static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -4374,6 +4602,11 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
!force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
int partition_vert_allowed =
!force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+#if CONFIG_ML_VAR_PARTITION
+ const int use_ml_based_partitioning =
+ sf->partition_search_type == ML_BASED_PARTITION;
+#endif // CONFIG_ML_VAR_PARTITION
+
(void)*tp_orig;
// Avoid checking for rectangular partitions for speed >= 6.
@@ -4404,6 +4637,20 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
partition_vert_allowed &= force_vert_split;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (use_ml_based_partitioning) {
+ if (partition_none_allowed || do_split) do_rect = 0;
+ if (partition_none_allowed && do_split) {
+ const int ml_predicted_partition =
+ ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
+ if (ml_predicted_partition == 0) do_split = 0;
+ if (ml_predicted_partition == 3) partition_none_allowed = 0;
+ }
+ }
+#endif // CONFIG_ML_VAR_PARTITION
+
+ if (!partition_none_allowed && !do_split) do_rect = 1;
+
ctx->pred_pixel_ready =
!(partition_vert_allowed || partition_horz_allowed || do_split);
@@ -4417,26 +4664,28 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
ctx->skip = x->skip;
if (this_rdc.rate != INT_MAX) {
- int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
this_rdc.rdcost =
RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < best_rdc.rdcost) {
- int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
- int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
-
- dist_breakout_thr >>=
- 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
- rate_breakout_thr *= num_pels_log2_lookup[bsize];
-
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
- this_rdc.dist < dist_breakout_thr) {
- do_split = 0;
- do_rect = 0;
+#if CONFIG_ML_VAR_PARTITION
+ if (!use_ml_based_partitioning)
+#endif // CONFIG_ML_VAR_PARTITION
+ {
+ int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
+ int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
+ dist_breakout_thr >>=
+ 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+ if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
+ this_rdc.dist < dist_breakout_thr) {
+ do_split = 0;
+ do_rect = 0;
+ }
}
}
}
@@ -4835,6 +5084,111 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
+#if CONFIG_ML_VAR_PARTITION
+// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
+static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *x, int mi_row, int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int is_key_frame = frame_is_intra_only(cm);
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+
+ if (!is_key_frame) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = NULL;
+ const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
+ (mi_row + 4 < cm->mi_rows);
+ int pixels_wide = 64, pixels_high = 64;
+ unsigned int y_sad_g, y_sad_thr;
+ unsigned int y_sad = UINT_MAX;
+
+ assert(yv12 != NULL);
+
+ if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+ if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+ cpi->svc.use_gf_temporal_ref_current_layer) {
+ // For now, GOLDEN will not be used for non-zero spatial layers, since
+ // it may not be a temporal reference.
+ yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
+
+ // Only compute y_sad_g (sad for golden reference) for speed < 8.
+ if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
+ (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ y_sad_g = cpi->fn_ptr[bsize].sdf(
+ x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ } else {
+ y_sad_g = UINT_MAX;
+ }
+
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) {
+ yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ALTREF_FRAME - 1].sf);
+ mi->ref_frame[0] = ALTREF_FRAME;
+ y_sad_g = UINT_MAX;
+ } else {
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
+ mi->ref_frame[0] = LAST_FRAME;
+ }
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = BLOCK_64X64;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = BILINEAR;
+
+ {
+ const MV dummy_mv = { 0, 0 };
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+ &dummy_mv);
+ x->sb_use_mv_part = 1;
+ x->sb_mvcol_part = mi->mv[0].as_mv.col;
+ x->sb_mvrow_part = mi->mv[0].as_mv.row;
+ }
+
+ // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+ // are close if short_circuit_low_temp_var is on.
+ y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+ if (y_sad_g < y_sad_thr) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ mi->ref_frame[0] = GOLDEN_FRAME;
+ mi->mv[0].as_int = 0;
+ y_sad = y_sad_g;
+ } else {
+ x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+ }
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = x->est_pred;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
+ case 10:
+ memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ case 12:
+ memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ }
+#else
+ memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+#endif // CONFIG_ML_VAR_PARTITION
+
static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, int mi_row,
TOKENEXTRA **tp) {
@@ -4926,6 +5280,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
break;
+#if CONFIG_ML_VAR_PARTITION
+ case ML_BASED_PARTITION:
+ get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
+ x->max_partition_size = BLOCK_64X64;
+ x->min_partition_size = BLOCK_8X8;
+ x->sb_pickmode_part = 1;
+ nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
+ td->pc_root);
+ break;
+#endif // CONFIG_ML_VAR_PARTITION
case SOURCE_VAR_BASED_PARTITION:
set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index c6e9fc840..c10d010a4 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2449,6 +2449,17 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_loop_filter_init(cm);
+ // Set up the unit scaling factor used during motion search.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height,
+ cm->use_highbitdepth);
+#else
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ cpi->td.mb.me_sf = &cpi->me_sf;
+
cm->error.setjmp = 0;
return cpi;
@@ -3058,6 +3069,12 @@ void update_ref_frames(VP9_COMP *cpi) {
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
}
+
+ if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
+ cpi->alt_fb_idx =
+ stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
+ --gf_group->stack_size;
+ }
}
void vp9_update_reference_frames(VP9_COMP *cpi) {
@@ -3211,8 +3228,8 @@ void vp9_scale_references(VP9_COMP *cpi) {
if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
// Check for release of scaled reference.
buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
- buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
- if (buf != NULL) {
+ if (buf_idx != INVALID_IDX) {
+ buf = &pool->frame_bufs[buf_idx];
--buf->ref_count;
cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
@@ -3243,22 +3260,21 @@ static void release_scaled_references(VP9_COMP *cpi) {
refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i - 1];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
- if (buf != NULL &&
- (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
- buf->buf.y_crop_height == ref->y_crop_height))) {
- --buf->ref_count;
- cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
+ if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
+ buf->buf.y_crop_height == ref->y_crop_height)) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ }
}
}
} else {
for (i = 0; i < MAX_REF_FRAMES; ++i) {
const int idx = cpi->scaled_ref_idx[i];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- if (buf != NULL) {
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
--buf->ref_count;
cpi->scaled_ref_idx[i] = INVALID_IDX;
}
@@ -4904,6 +4920,8 @@ static void init_ref_frame_bufs(VP9_COMMON *cm) {
cm->new_fb_idx = INVALID_IDX;
for (i = 0; i < REF_FRAMES; ++i) {
cm->ref_frame_map[i] = INVALID_IDX;
+ }
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
pool->frame_bufs[i].ref_count = 0;
}
}
@@ -5539,12 +5557,13 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps.
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ USE_2_TAPS);
return bestsme;
}
@@ -6498,11 +6517,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->summedp_quality += frame_ssim2 * weight;
cpi->summedp_weights += weight;
#if 0
- {
+ if (cm->show_frame) {
FILE *f = fopen("q_used.stt", "a");
fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
- cpi->common.current_video_frame, y2, u2, v2,
- frame_psnr2, frame_ssim2);
+ cpi->common.current_video_frame, psnr2.psnr[1],
+ psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
fclose(f);
}
#endif
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 25396bc7a..a9f7daf07 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -605,6 +605,7 @@ typedef struct VP9_COMP {
ActiveMap active_map;
fractional_mv_step_fp *find_fractional_mv_step;
+ struct scale_factors me_sf;
vp9_diamond_search_fn_t diamond_search_sad;
vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
uint64_t time_receive_data;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 38e98cd1e..58c3a435d 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -2085,18 +2085,6 @@ static int calculate_boost_bits(int frame_count, int boost,
0);
}
-// Current limit on maximum number of active arfs in a GF/ARF group.
-#define MAX_ACTIVE_ARFS 2
-#define ARF_SLOT1 2
-#define ARF_SLOT2 3
-// This function indirects the choice of buffers for arfs.
-// At the moment the values are fixed but this may change as part of
-// the integration process with other codec features that swap buffers around.
-static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
- arf_buffer_indices[0] = ARF_SLOT1;
- arf_buffer_indices[1] = ARF_SLOT2;
-}
-
// Used in corpus vbr: Calculates the total normalized group complexity score
// for a given number of frames starting at the current position in the stats
// file.
@@ -2172,6 +2160,20 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
find_arf_order(cpi, gf_group, index_counter, depth + 1, mid + 1, end);
}
+static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group,
+ int frame_index,
+ int source_alt_ref_active) {
+ if (source_alt_ref_active) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->layer_depth[frame_index] = 0;
+ }
+}
+
static int define_gf_group_structure(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
@@ -2179,7 +2181,6 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
int i;
int frame_index = 0;
int key_frame;
- unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
int normal_frames;
key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -2187,24 +2188,11 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
gf_group->frame_start = cpi->common.current_video_frame;
gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval - 1;
- get_arf_buffer_indices(arf_buffer_indices);
-
// For key frames the frame target rate is already set and it
// is also the golden frame.
// === [frame_index == 0] ===
- if (!key_frame) {
- if (rc->source_alt_ref_active) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- gf_group->layer_depth[frame_index] = 0;
- }
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
- }
+ if (!key_frame)
+ set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_active);
++frame_index;
@@ -2213,12 +2201,8 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
gf_group->update_type[frame_index] = ARF_UPDATE;
gf_group->rf_level[frame_index] = GF_ARF_STD;
gf_group->layer_depth[frame_index] = 1;
-
gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
-
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
++frame_index;
}
@@ -2226,13 +2210,9 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
find_arf_order(cpi, gf_group, &frame_index, 2, 0,
rc->baseline_gf_interval - 1);
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
+ set_gf_overlay_frame_type(gf_group, frame_index,
+ rc->source_alt_ref_pending);
+
gf_group->arf_src_offset[frame_index] = 0;
return frame_index;
@@ -2242,12 +2222,8 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
for (i = 0; i < normal_frames; ++i) {
- int arf_idx = 0;
if (twopass->stats_in >= twopass->stats_in_end) break;
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
-
gf_group->update_type[frame_index] = LF_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
gf_group->arf_src_offset[frame_index] = 0;
@@ -2260,8 +2236,8 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
// We need to configure the frame at the end of the sequence + 1 that will be
// the start frame for the next group. Otherwise prior to the call to
// vp9_rc_get_second_pass_params() the data will be undefined.
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
+
+ set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending);
if (rc->source_alt_ref_pending) {
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
@@ -2383,9 +2359,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
}
gf_group->bit_allocation[idx] = 0;
- for (idx = 0; idx < gop_frames; ++idx)
- if (gf_group->update_type[idx] == LF_UPDATE) break;
-
return;
}
@@ -2485,6 +2458,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+ double gop_intra_factor = 1.0;
+
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
@@ -2524,8 +2499,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
{
int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
cpi->common.bit_depth));
- int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
- cpi->common.bit_depth));
+ int q_term = (cm->current_video_frame == 0)
+ ? int_max_q / 32
+ : (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
+ cpi->common.bit_depth) /
+ 6);
active_min_gf_interval =
rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
active_min_gf_interval =
@@ -2535,7 +2513,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// bits to spare and are better with a smaller interval and smaller boost.
// At high Q when there are few bits to spare we are better with a longer
// interval to spread the cost of the GF.
- active_max_gf_interval = 12 + arf_active_or_kf + VPXMIN(4, (int_lbq / 6));
+ active_max_gf_interval = 11 + arf_active_or_kf + VPXMIN(5, q_term);
+
+ // Force max GF interval to be odd.
+ active_max_gf_interval = active_max_gf_interval | 0x01;
// We have: active_min_gf_interval <=
// rc->max_gf_interval + arf_active_or_kf.
@@ -2552,6 +2533,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
active_max_gf_interval = rc->frames_to_key / 2;
}
+ if (cpi->multi_layer_arf) {
+ int layers = 0;
+ int max_layers = VPXMIN(MAX_ARF_LAYERS, cpi->oxcf.enable_auto_arf);
+
+ // Adapt the intra_error factor to active_max_gf_interval limit.
+ for (i = active_max_gf_interval; i > 0; i >>= 1) ++layers;
+
+ layers = VPXMIN(max_layers, layers);
+ gop_intra_factor += (layers * 0.25);
+ }
+
i = 0;
while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) {
++i;
@@ -2624,11 +2616,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
// If possible dont break very close to a kf
- ((rc->frames_to_key - i) >= rc->min_gf_interval) &&
+ ((rc->frames_to_key - i) >= rc->min_gf_interval) && (i & 0x01) &&
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
- (sr_accumulator > next_frame.intra_error)))) {
+ (sr_accumulator > gop_intra_factor * next_frame.intra_error)))) {
break;
}
diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h
index da1c61c49..9bd0a9e04 100644
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -43,12 +43,6 @@ typedef struct {
#define INVALID_ROW -1
-// Length of the bi-predictive frame group (BFG)
-// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
-// number of bi-predictive frames.
-#define BFG_INTERVAL 2
-#define MAX_EXT_ARFS 2
-#define MIN_EXT_ARF_INTERVAL 4
#define MAX_ARF_LAYERS 6
typedef struct {
@@ -117,8 +111,9 @@ typedef enum {
GF_UPDATE = 2,
ARF_UPDATE = 3,
OVERLAY_UPDATE = 4,
- USE_BUF_FRAME = 5, // Use show existing frame, no ref buffer update
- FRAME_UPDATE_TYPES = 6
+ MID_OVERLAY_UPDATE = 5,
+ USE_BUF_FRAME = 6, // Use show existing frame, no ref buffer update
+ FRAME_UPDATE_TYPES = 7
} FRAME_UPDATE_TYPE;
#define FC_ANIMATION_THRESH 0.15
@@ -134,10 +129,6 @@ typedef struct {
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2];
- unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 2];
- unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 2];
- unsigned char brf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
- unsigned char bidir_pred_enabled[MAX_STATIC_GF_GROUP_LENGTH + 2];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2];
int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2];
@@ -203,7 +194,6 @@ struct ThreadData;
struct TileDataEnc;
void vp9_init_first_pass(struct VP9_COMP *cpi);
-void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
void vp9_end_first_pass(struct VP9_COMP *cpi);
@@ -222,17 +212,6 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
-static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
- assert(MAX_EXT_ARFS > 0);
- if (arf_pending) {
- if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1))
- return MAX_EXT_ARFS;
- else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS)
- return MAX_EXT_ARFS - 1;
- }
- return 0;
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 2ec048b53..831c79c17 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
{
uint32_t distortion;
uint32_t sse;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_search_level,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ 0, USE_2_TAPS);
}
xd->mi[0]->mode = NEWMV;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 995c54fc7..0f9051bb7 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -367,14 +367,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
*ir = (int)divide_and_round(x1 * b, y1);
}
-uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
- const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- uint32_t *distortion, uint32_t *sse1,
- const uint8_t *second_pred, int w, int h) {
+uint32_t vp9_skip_sub_pixel_tree(
+ const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
+ uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -397,6 +395,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
(void)sse;
(void)thismse;
(void)cost_list;
+ (void)use_accurate_subpel_search;
return besterr;
}
@@ -406,7 +405,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -418,6 +417,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
(void)allow_hp;
(void)forced_stop;
(void)hstep;
+ (void)use_accurate_subpel_search;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
@@ -471,8 +471,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -531,8 +533,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -617,12 +621,119 @@ static const MV search_step_table[12] = {
};
/* clang-format on */
+static int accurate_sub_pel_search(
+ const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
+ const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
+ const uint8_t *const src_address, const int src_stride,
+ const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
+ int w, int h, uint32_t *sse) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t besterr;
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
+ vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
+ pred16, w, this_mv, sf, w, h, 0, kernel,
+ MV_PRECISION_Q3, 0, 0, xd->bd);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
+ vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
+ h, pred16, w);
+ besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
+ src_stride, sse);
+ } else {
+ besterr =
+ vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
+ }
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ }
+ if (besterr >= UINT_MAX) return UINT_MAX;
+ return (int)besterr;
+#else
+ int besterr;
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ (void)xd;
+
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ return besterr;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
+// TODO(yunqing): this part can be further refactored.
+#if CONFIG_VP9_HIGHBITDEPTH
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ int64_t tmpmse; \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ tmpmse = thismse; \
+ tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (tmpmse >= INT_MAX) { \
+ v = INT_MAX; \
+ } else if ((v = (uint32_t)tmpmse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
+ thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#endif
+
uint32_t vp9_find_best_sub_pixel_tree(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
@@ -650,6 +761,14 @@ uint32_t vp9_find_best_sub_pixel_tree(
int kr, kc;
MvLimits subpel_mv_limits;
+ // TODO(yunqing): need to add 4-tap filter optimization to speed up the
+ // encoder.
+ const InterpKernel *kernel = (use_accurate_subpel_search > 0)
+ ? ((use_accurate_subpel_search == USE_4_TAPS)
+ ? vp9_filter_kernels[FOURTAP]
+ : vp9_filter_kernels[EIGHTTAP])
+ : vp9_filter_kernels[BILINEAR];
+
vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
minc = subpel_mv_limits.col_min;
maxc = subpel_mv_limits.col_max;
@@ -674,16 +793,25 @@ uint32_t vp9_find_best_sub_pixel_tree(
tr = br + search_step[idx].row;
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, &sse, second_pred);
+
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y,
+ y_stride, second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
mvcost, error_per_bit);
@@ -705,14 +833,21 @@ uint32_t vp9_find_best_sub_pixel_tree(
tc = bc + kc;
tr = br + kr;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv = { tr, tc };
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse, second_pred);
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y, y_stride,
+ second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
error_per_bit);
@@ -743,20 +878,36 @@ uint32_t vp9_find_best_sub_pixel_tree(
if (tr == br && tc != bc) {
kc = bc - tc;
if (iters_per_step == 1) {
- CHECK_BETTER(second, br0, bc0 + kc);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ } else {
+ CHECK_BETTER(second, br0, bc0 + kc);
+ }
}
} else if (tr != br && tc == bc) {
kr = br - tr;
if (iters_per_step == 1) {
- CHECK_BETTER(second, br0 + kr, bc0);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ }
}
}
if (iters_per_step > 1) {
- CHECK_BETTER(second, br0 + kr, bc0);
- CHECK_BETTER(second, br0, bc0 + kc);
- if (br0 != br || bc0 != bc) {
- CHECK_BETTER(second, br0 + kr, bc0 + kc);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER1(second, br0 + kr, bc0 + kc);
+ }
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ CHECK_BETTER(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER(second, br0 + kr, bc0 + kc);
+ }
}
}
}
@@ -781,6 +932,7 @@ uint32_t vp9_find_best_sub_pixel_tree(
}
#undef CHECK_BETTER
+#undef CHECK_BETTER1
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
int range) {
@@ -2587,7 +2739,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
(void)tc; \
(void)sse; \
(void)thismse; \
- (void)cost_list;
+ (void)cost_list; \
+ (void)use_accurate_subpel_search;
// Return the maximum MV.
uint32_t vp9_return_max_sub_pixel_mv(
@@ -2595,7 +2748,7 @@ uint32_t vp9_return_max_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)minr;
@@ -2617,7 +2770,7 @@ uint32_t vp9_return_min_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)maxr;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index adb02bc1a..6bd85a152 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -75,7 +75,7 @@ typedef uint32_t(fractional_mv_step_fp)(
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h);
+ int h, int use_accurate_subpel_search);
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
diff --git a/vp9/encoder/vp9_partition_models.h b/vp9/encoder/vp9_partition_models.h
index 19979e531..904d21400 100644
--- a/vp9/encoder/vp9_partition_models.h
+++ b/vp9/encoder/vp9_partition_models.h
@@ -18,7 +18,9 @@ extern "C" {
#define NN_MAX_HIDDEN_LAYERS 10
#define NN_MAX_NODES_PER_LAYER 128
-// Neural net model config.
+// Neural net model config. It defines the layout of a neural net model, such as
+// the number of inputs/outputs, number of layers, the number of nodes in each
+// layer, as well as the weights and bias of each node.
typedef struct {
int num_inputs; // Number of input nodes, i.e. features.
int num_outputs; // Number of output nodes.
@@ -834,6 +836,308 @@ static const NN_CONFIG vp9_partition_nnconfig_16x16 = {
};
#undef FEATURES
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = {
+ -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f,
+ -0.143823f, 0.378511f, -1.997788f, -2.166866f, -1.930158f, -1.202127f,
+ -0.611875f, -0.506422f, -0.432487f, 0.071205f, 0.578172f, -0.154285f,
+ -0.051830f, 0.331681f, -1.457177f, -2.443546f, -2.000302f, -1.389283f,
+ 0.372084f, -0.464917f, 2.265235f, 2.385787f, 2.312722f, 2.127868f,
+ -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f, 0.193976f,
+ -0.305611f, 0.256632f, 0.309388f, -0.437439f, 1.702640f, -5.007069f,
+ -0.323450f, 0.294227f, 1.267193f, 1.056601f, 0.387181f, -0.191215f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer0[8] = {
+ -0.044396f, -0.938166f, 0.000000f, -0.916375f,
+ 1.242299f, 0.000000f, -0.405734f, 0.014206f,
+};
+
+static const float vp9_var_part_nn_weights_64_layer1[8] = {
+ 1.635945f, 0.979557f, 0.455315f, 1.197199f,
+ -2.251024f, -0.464953f, 1.378676f, -0.111927f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer1[1] = {
+ -0.37972447f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_64_layer0,
+ vp9_var_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_64_layer0,
+ vp9_var_part_nn_bias_64_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = {
+ 0.067243f, -0.083598f, -2.191159f, 2.726434f, -3.324013f, 3.477977f,
+ 0.323736f, -0.510199f, 2.960693f, 2.937661f, 2.888476f, 2.938315f,
+ -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f,
+ 0.665153f, -0.273210f, 0.028279f, 0.972220f, -0.445596f, 1.756611f,
+ -0.177892f, -0.091758f, 0.436661f, -0.521506f, 0.133786f, 0.266743f,
+ 0.637367f, -0.160084f, -1.396269f, 1.020841f, -1.112971f, 0.919496f,
+ -0.235883f, 0.651954f, 0.109061f, -0.429463f, 0.740839f, -0.962060f,
+ 0.299519f, -0.386298f, 1.550231f, 2.464915f, 1.311969f, 2.561612f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer0[8] = {
+ 0.368242f, 0.736617f, 0.000000f, 0.757287f,
+ 0.000000f, 0.613248f, -0.776390f, 0.928497f,
+};
+
+static const float vp9_var_part_nn_weights_32_layer1[8] = {
+ 0.939884f, -2.420850f, -0.410489f, -0.186690f,
+ 0.063287f, -0.522011f, 0.484527f, -0.639625f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer1[1] = {
+ -0.6455006f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_32_layer0,
+ vp9_var_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_32_layer0,
+ vp9_var_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = {
+ 0.742567f, -0.580624f, -0.244528f, 0.331661f, -0.113949f, -0.559295f,
+ -0.386061f, 0.438653f, 1.467463f, 0.211589f, 0.513972f, 1.067855f,
+ -0.876679f, 0.088560f, -0.687483f, -0.380304f, -0.016412f, 0.146380f,
+ 0.015318f, 0.000351f, -2.764887f, 3.269717f, 2.752428f, -2.236754f,
+ 0.561539f, -0.852050f, -0.084667f, 0.202057f, 0.197049f, 0.364922f,
+ -0.463801f, 0.431790f, 1.872096f, -0.091887f, -0.055034f, 2.443492f,
+ -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f,
+ 0.642021f, -0.875117f, 2.040794f, 1.921070f, 1.792413f, 1.839727f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer0[8] = {
+ 2.901234f, -1.940932f, -0.198970f, -0.406524f,
+ 0.059422f, -1.879207f, -0.232340f, 2.979821f,
+};
+
+static const float vp9_var_part_nn_weights_16_layer1[8] = {
+ -0.528731f, 0.375234f, -0.088422f, 0.668629f,
+ 0.870449f, 0.578735f, 0.546103f, -1.957207f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer1[1] = {
+ -1.95769405f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_16_layer0,
+ vp9_var_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_16_layer0,
+ vp9_var_part_nn_bias_16_layer1,
+ },
+};
+#undef FEATURES
+#endif // CONFIG_ML_VAR_PARTITION
+
+#define FEATURES 6
+#define LABELS 1
+static const float vp9_var_rd_part_nn_weights_64_layer0[FEATURES * 8] = {
+ -0.100129f, 0.128867f, -1.375086f, -2.268096f, -1.470368f, -2.296274f,
+ 0.034445f, -0.062993f, -2.151904f, 0.523215f, 1.611269f, 1.530051f,
+ 0.418182f, -1.330239f, 0.828388f, 0.386546f, -0.026188f, -0.055459f,
+ -0.474437f, 0.861295f, -2.208743f, -0.652991f, -2.985873f, -1.728956f,
+ 0.388052f, -0.420720f, 2.015495f, 1.280342f, 3.040914f, 1.760749f,
+ -0.009062f, 0.009623f, 1.579270f, -2.012891f, 1.629662f, -1.796016f,
+ -0.279782f, -0.288359f, 1.875618f, 1.639855f, 0.903020f, 0.906438f,
+ 0.553394f, -1.621589f, 0.185063f, 0.605207f, -0.133560f, 0.588689f,
+};
+
+static const float vp9_var_rd_part_nn_bias_64_layer0[8] = {
+ 0.659717f, 0.120912f, 0.329894f, -1.586385f,
+ 1.715839f, 0.085754f, 2.038774f, 0.268119f,
+};
+
+static const float vp9_var_rd_part_nn_weights_64_layer1[8 * LABELS] = {
+ -3.445586f, 2.375620f, 1.236970f, 0.804030f,
+ -2.448384f, 2.827254f, 2.291478f, 0.790252f,
+};
+
+static const float vp9_var_rd_part_nn_bias_64_layer1[LABELS] = {
+ -1.16608453f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_64_layer0,
+ vp9_var_rd_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_64_layer0,
+ vp9_var_rd_part_nn_bias_64_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_32_layer0[FEATURES * 8] = {
+ 0.022420f, -0.032201f, 1.228065f, -2.767655f, 1.928743f, 0.566863f,
+ 0.459229f, 0.422048f, 0.833395f, 0.822960f, -0.232227f, 0.586895f,
+ 0.442856f, -0.018564f, 0.227672f, -1.291306f, 0.119428f, -0.776563f,
+ -0.042947f, 0.183129f, 0.592231f, 1.174859f, -0.503868f, 0.270102f,
+ -0.330537f, -0.036340f, 1.144630f, 1.783710f, 1.216929f, 2.038085f,
+ 0.373782f, -0.430258f, 1.957002f, 1.383908f, 2.012261f, 1.585693f,
+ -0.394399f, -0.337523f, -0.238335f, 0.007819f, -0.368294f, 0.437875f,
+ -0.318923f, -0.242000f, 2.276263f, 1.501432f, 0.645706f, 0.344774f,
+};
+
+static const float vp9_var_rd_part_nn_bias_32_layer0[8] = {
+ -0.023846f, -1.348117f, 1.365007f, -1.644164f,
+ 0.062992f, 1.257980f, -0.098642f, 1.388472f,
+};
+
+static const float vp9_var_rd_part_nn_weights_32_layer1[8 * LABELS] = {
+ 3.016729f, 0.622684f, -1.021302f, 1.490383f,
+ 1.702046f, -2.964618f, 0.689045f, 1.711754f,
+};
+
+static const float vp9_var_rd_part_nn_bias_32_layer1[LABELS] = {
+ -1.28798676f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_32_layer0,
+ vp9_var_rd_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_32_layer0,
+ vp9_var_rd_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_16_layer0[FEATURES * 8] = {
+ -0.726813f, -0.026748f, 1.376946f, 1.467961f, 1.961810f, 1.690412f,
+ 0.596484f, -0.261486f, -0.310905f, -0.366311f, -1.300086f, -0.534336f,
+ 0.040520f, -0.032391f, -1.194214f, 2.438063f, -3.915334f, 1.997270f,
+ 0.673696f, -0.676393f, 1.654886f, 1.553838f, 1.129691f, 1.360201f,
+ 0.255001f, 0.336442f, -0.487759f, -0.634555f, 0.479170f, -0.110475f,
+ -0.661852f, -0.158872f, -0.350243f, -0.303957f, -0.045018f, 0.586151f,
+ -0.262463f, 0.228079f, -1.688776f, -1.594502f, -2.261078f, -1.802535f,
+ 0.034748f, -0.028476f, 2.713258f, 0.212446f, -1.529202f, -2.560178f,
+};
+
+static const float vp9_var_rd_part_nn_bias_16_layer0[8] = {
+ 0.495983f, 1.858545f, 0.162974f, 1.992247f,
+ -2.698863f, 0.110020f, 0.550830f, 0.420941f,
+};
+
+static const float vp9_var_rd_part_nn_weights_16_layer1[8 * LABELS] = {
+ 1.768409f, -1.394240f, 1.076846f, -1.762808f,
+ 1.517405f, 0.535195f, -0.426827f, 1.002272f,
+};
+
+static const float vp9_var_rd_part_nn_bias_16_layer1[LABELS] = {
+ -1.65894794f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_16_layer0,
+ vp9_var_rd_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_16_layer0,
+ vp9_var_rd_part_nn_bias_16_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_8_layer0[FEATURES * 8] = {
+ -0.804900f, -1.214983f, 0.840202f, 0.686566f, 0.155804f, 0.025542f,
+ -1.244635f, -0.368403f, 0.364150f, 1.081073f, 0.552387f, 0.452715f,
+ 0.652968f, -0.293058f, 0.048967f, 0.021240f, -0.662981f, 0.424700f,
+ 0.008293f, -0.013088f, 0.747007f, -1.453907f, -1.498226f, 1.593252f,
+ -0.239557f, -0.143766f, 0.064311f, 1.320998f, -0.477411f, 0.026374f,
+ 0.730884f, -0.675124f, 0.965521f, 0.863658f, 0.809186f, 0.812280f,
+ 0.513131f, 0.185102f, 0.211354f, 0.793666f, 0.121714f, -0.015383f,
+ -0.650980f, -0.046581f, 0.911141f, 0.806319f, 0.974773f, 0.815893f,
+};
+
+static const float vp9_var_rd_part_nn_bias_8_layer0[8] = {
+ 0.176134f, 0.651308f, 2.007761f, 0.068812f,
+ 1.061517f, 1.487161f, -2.308147f, 1.099828f,
+};
+
+static const float vp9_var_rd_part_nn_weights_8_layer1[8 * LABELS] = {
+ 0.683032f, 1.326393f, -1.661539f, 1.438920f,
+ 1.118023f, -2.237380f, 1.518468f, 2.010416f,
+};
+
+static const float vp9_var_rd_part_nn_bias_8_layer1[LABELS] = {
+ -1.65423989f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_8 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_8_layer0,
+ vp9_var_rd_part_nn_weights_8_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_8_layer0,
+ vp9_var_rd_part_nn_bias_8_layer1,
+ },
+};
+#undef FEATURES
+#undef LABELS
+
// Partition pruning model(linear).
static const float vp9_partition_feature_mean[24] = {
303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index 416d437e0..8dce4cf7b 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -247,7 +247,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
}
@@ -1539,7 +1540,8 @@ static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x,
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
} else if (svc->use_base_mv && svc->spatial_layer_id) {
if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
const int pre_stride = xd->plane[0].pre[0].stride;
@@ -2758,7 +2760,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dummy_dist,
- &x->pred_sse[ref_frame], NULL, 0, 0);
+ &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
} else {
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 86d2fa18c..76e310ac2 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -777,13 +777,19 @@ static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
kf_low_motion_minq, kf_high_motion_minq);
}
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
+static int get_gf_active_quality(const VP9_COMP *const cpi, int q,
vpx_bit_depth_t bit_depth) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const RATE_CONTROL *const rc = &cpi->rc;
+
int *arfgf_low_motion_minq;
int *arfgf_high_motion_minq;
+ const int gfu_boost = cpi->multi_layer_arf
+ ? gf_group->gfu_boost[gf_group->index]
+ : rc->gfu_boost;
ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
- return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
+ return get_active_quality(q, gfu_boost, gf_low, gf_high,
arfgf_low_motion_minq, arfgf_high_motion_minq);
}
@@ -935,7 +941,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
} else {
q = active_worst_quality;
}
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
} else {
// Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1) {
@@ -1082,7 +1088,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
if (oxcf->rc_mode == VPX_CQ) {
if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -1097,7 +1103,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
@@ -1278,7 +1284,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
if (oxcf->rc_mode == VPX_CQ) {
if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -1287,7 +1293,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
@@ -1295,7 +1301,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
active_best_quality = (active_best_quality + cq_level + 1) / 2;
}
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
@@ -1445,6 +1451,12 @@ void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) {
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
+ case MID_OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
case USE_BUF_FRAME:
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 698faa343..9cde479cd 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1821,7 +1821,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred, pw, ph);
+ &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -1875,6 +1875,8 @@ static int64_t rd_pick_best_sub8x8_mode(
const BLOCK_SIZE bsize = mi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int pw = num_4x4_blocks_wide << 2;
+ const int ph = num_4x4_blocks_high << 2;
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
SPEED_FEATURES *const sf = &cpi->sf;
@@ -2011,7 +2013,8 @@ static int64_t rd_pick_best_sub8x8_mode(
x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &distortion,
- &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
+ &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
// save motion search result for use in compound prediction
seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
@@ -2330,6 +2333,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int best_predmv_idx = x->mv_best_ref_index[ref];
const YV12_BUFFER_CONFIG *scaled_ref_frame =
vp9_get_scaled_ref_frame(cpi, ref);
+ const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
@@ -2452,7 +2457,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
}
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index b54587931..87b417a4b 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -219,6 +219,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->less_rectangular_check = 1;
sf->use_square_partition_only = !boosted;
sf->prune_ref_frame_for_rect_partitions = 1;
+ sf->ml_var_partition_pruning = 1;
sf->ml_prune_rect_partition_threhold[0] = -1;
sf->ml_prune_rect_partition_threhold[1] = 350;
@@ -241,6 +242,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
if (speed >= 1) {
sf->enable_tpl_model = 0;
+ sf->ml_var_partition_pruning = !boosted;
sf->ml_prune_rect_partition_threhold[1] = 200;
sf->ml_prune_rect_partition_threhold[2] = 200;
sf->ml_prune_rect_partition_threhold[3] = 200;
@@ -286,9 +288,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
+ sf->use_accurate_subpel_search = USE_4_TAPS;
}
if (speed >= 2) {
+ sf->ml_var_partition_pruning = 0;
if (oxcf->vbr_corpus_complexity)
sf->recode_loop = ALLOW_RECODE_FIRST;
else
@@ -326,6 +330,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
+
+ sf->use_accurate_subpel_search = USE_2_TAPS;
}
if (speed >= 3) {
@@ -448,6 +454,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->disable_golden_ref = 0;
sf->enable_tpl_model = 0;
sf->enhanced_full_pixel_motion_search = 0;
+ sf->use_accurate_subpel_search = USE_2_TAPS;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -623,7 +630,18 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_altref_onepass = 1;
sf->use_compound_nonrd_pickmode = 1;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)
+ sf->partition_search_type = ML_BASED_PARTITION;
+ else
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
sf->partition_search_type = VAR_BASED_PARTITION;
+#endif // CONFIG_ML_VAR_PARTITION
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
@@ -928,6 +946,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->ml_prune_rect_partition_threhold[1] = -1;
sf->ml_prune_rect_partition_threhold[2] = -1;
sf->ml_prune_rect_partition_threhold[3] = -1;
+ sf->ml_var_partition_pruning = 0;
+ sf->use_accurate_subpel_search = USE_8_TAPS;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 531df704c..0067bb4ac 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -136,20 +136,25 @@ typedef enum {
} INTERP_FILTER_MASK;
typedef enum {
- // Search partitions using RD/NONRD criterion
+ // Search partitions using RD/NONRD criterion.
SEARCH_PARTITION,
- // Always use a fixed size partition
+ // Always use a fixed size partition.
FIXED_PARTITION,
REFERENCE_PARTITION,
// Use an arbitrary partitioning scheme based on source variance within
- // a 64X64 SB
+ // a 64X64 SB.
VAR_BASED_PARTITION,
- // Use non-fixed partitions based on source variance
- SOURCE_VAR_BASED_PARTITION
+ // Use non-fixed partitions based on source variance.
+ SOURCE_VAR_BASED_PARTITION,
+
+#if CONFIG_ML_VAR_PARTITION
+ // Make partition decisions with machine learning models.
+ ML_BASED_PARTITION
+#endif // CONFIG_ML_VAR_PARTITION
} PARTITION_SEARCH_TYPE;
typedef enum {
@@ -238,6 +243,12 @@ typedef enum {
RE_ENCODE_MAXQ = 2
} OVERSHOOT_DETECTION_CBR_RT;
+typedef enum {
+ USE_2_TAPS = 0,
+ USE_4_TAPS,
+ USE_8_TAPS,
+} SUBPEL_SEARCH_TYPE;
+
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -510,6 +521,10 @@ typedef struct SPEED_FEATURES {
// Machine-learning based partition search early termination
int ml_partition_search_early_termination;
+ // Machine-learning based partition search pruning using prediction residue
+ // variance.
+ int ml_var_partition_pruning;
+
// Allow skipping partition search for still image frame
int allow_partition_search_skip;
@@ -577,6 +592,10 @@ typedef struct SPEED_FEATURES {
// Allow for disabling golden reference.
int disable_golden_ref;
+
+ // Allow sub-pixel search to use interpolation filters with different taps in
+ // order to achieve accurate motion search result.
+ SUBPEL_SEARCH_TYPE use_accurate_subpel_search;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 51668d01d..7ac70c8ea 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -421,12 +421,13 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_search_level,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ USE_2_TAPS);
// Restore input state
x->plane[0].src = src;
diff --git a/vp9/vp9_dx_iface.c b/vp9/vp9_dx_iface.c
index fdff87768..6a4cb9acf 100644
--- a/vp9/vp9_dx_iface.c
+++ b/vp9/vp9_dx_iface.c
@@ -270,6 +270,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
RANGE_CHECK(ctx, row_mt, 0, 1);
ctx->pbi->row_mt = ctx->row_mt;
+ RANGE_CHECK(ctx, lpf_opt, 0, 1);
+ ctx->pbi->lpf_mt_opt = ctx->lpf_opt;
+
// If postprocessing was enabled by the application and a
// configuration has not been provided, default it.
if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
@@ -658,6 +661,13 @@ static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->lpf_opt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -670,6 +680,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
{ VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
{ VP9D_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt },
// Getters
{ VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/vp9/vp9_dx_iface.h b/vp9/vp9_dx_iface.h
index a1c335278..f60688c4d 100644
--- a/vp9/vp9_dx_iface.h
+++ b/vp9/vp9_dx_iface.h
@@ -46,6 +46,7 @@ struct vpx_codec_alg_priv {
int svc_decoding;
int svc_spatial_layer;
int row_mt;
+ int lpf_opt;
};
#endif // VPX_VP9_VP9_DX_IFACE_H_