summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/encoder/vp9_encodeframe.c74
-rw-r--r--vp9/encoder/vp9_encoder.c353
-rw-r--r--vp9/encoder/vp9_encoder.h2
-rw-r--r--vp9/encoder/vp9_firstpass.c17
-rw-r--r--vp9/encoder/vp9_mcomp.c5
-rw-r--r--vp9/encoder/vp9_mcomp.h6
-rw-r--r--vp9/encoder/vp9_pickmode.c15
-rw-r--r--vp9/encoder/vp9_rdopt.c2
-rw-r--r--vp9/encoder/vp9_temporal_filter.c7
-rw-r--r--vp9/encoder/x86/temporal_filter_sse4.c4
-rw-r--r--vp9/vp9_common.mk3
-rw-r--r--vp9/vp9cx.mk3
12 files changed, 414 insertions, 77 deletions
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 5adefac1a..679a62b5a 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3440,6 +3440,45 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
#undef FEATURES
#undef LABELS
+// Perform fast and coarse motion search for the given block. This is a
+// pre-processing step for the ML based partition search speedup.
+static void simple_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, MV ref_mv,
+ MV_REFERENCE_FRAME ref, uint8_t *pred_buf) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mi = xd->mi[0];
+ const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref);
+ const int step_param = 1;
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ const SEARCH_METHODS search_method = NSTEP;
+ const int sadpb = x->sadperbit16;
+ MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 };
+ MV best_mv = { 0, 0 };
+ int cost_list[5];
+
+ assert(yv12 != NULL);
+ if (!yv12) return;
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ref - 1].sf);
+ mi->ref_frame[0] = ref;
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = bsize;
+ vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
+ vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method,
+ sadpb, cond_cost_list(cpi, cost_list), &ref_mv,
+ &best_mv, 0, 0);
+ best_mv.row *= 8;
+ best_mv.col *= 8;
+ x->mv_limits = tmp_mv_limits;
+ mi->mv[0].as_mv = best_mv;
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = pred_buf;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+}
+
// Use a neural net model to prune partition-none and partition-split search.
// The model uses prediction residue variance and quantization step size as
// input features.
@@ -3448,10 +3487,9 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row,
int mi_col, int *none, int *split) {
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *xd = &x->e_mbd;
- MODE_INFO *mi = xd->mi[0];
const NN_CONFIG *nn_config = NULL;
#if CONFIG_VP9_HIGHBITDEPTH
+ MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
? (CONVERT_TO_BYTEPTR(pred_buffer))
@@ -3489,41 +3527,13 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
if (!nn_config) return;
- mi->ref_frame[1] = NONE;
- mi->sb_type = bsize;
// Do a simple single motion search to find a prediction for current block.
// The variance of the residue will be used as input features.
{
+ const MV ref_mv = { 0, 0 };
const MV_REFERENCE_FRAME ref =
cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
- YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
- MV ref_mv = { 0, 0 };
- MV ref_mv_full = { 0, 0 };
- const int step_param = 1;
- const MvLimits tmp_mv_limits = x->mv_limits;
- const SEARCH_METHODS search_method = NSTEP;
- const int sadpb = x->sadperbit16;
- MV best_mv = { 0, 0 };
- int cost_list[5];
-
- assert(yv12 != NULL);
- if (!yv12) return;
- vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
- &cm->frame_refs[ref - 1].sf);
- mi->ref_frame[0] = ref;
- vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
- vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
- search_method, sadpb, cond_cost_list(cpi, cost_list),
- &ref_mv, &best_mv, 0, 0);
- best_mv.row *= 8;
- best_mv.col *= 8;
- x->mv_limits = tmp_mv_limits;
- mi->mv[0].as_mv = best_mv;
-
- set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
- xd->plane[0].dst.buf = pred_buf;
- xd->plane[0].dst.stride = 64;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf);
}
vpx_clear_system_state();
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 11037cd14..eaeb3d96e 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -29,6 +29,9 @@
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_idct.h"
+#if CONFIG_NON_GREEDY_MV
+#include "vp9/common/vp9_mvref_common.h"
+#endif
#if CONFIG_VP9_POSTPROC
#include "vp9/common/vp9_postproc.h"
#endif
@@ -2570,6 +2573,7 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vpx_free(cpi->feature_score_loc_arr);
vpx_free(cpi->feature_score_loc_sort);
vpx_free(cpi->feature_score_loc_heap);
+ vpx_free(cpi->select_mv_arr);
#endif
for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
#if CONFIG_NON_GREEDY_MV
@@ -5838,31 +5842,6 @@ static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
}
}
-#if CONFIG_NON_GREEDY_MV
-double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) {
- double IxIx = 0;
- double IxIy = 0;
- double IyIy = 0;
- double score;
- int r, c;
- vpx_clear_system_state();
- for (r = 0; r + 1 < rows; ++r) {
- for (c = 0; c + 1 < cols; ++c) {
- int diff_x = buf[r * stride + c] - buf[r * stride + c + 1];
- int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c];
- IxIx += diff_x * diff_x;
- IxIy += diff_x * diff_y;
- IyIy += diff_y * diff_y;
- }
- }
- IxIx /= (rows - 1) * (cols - 1);
- IxIy /= (rows - 1) * (cols - 1);
- IyIy /= (rows - 1) * (cols - 1);
- score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001);
- return score;
-}
-#endif
-
static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
int mi_col) {
x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
@@ -6035,6 +6014,326 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
}
#if CONFIG_NON_GREEDY_MV
+static void get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture,
+ int frame_idx, int rf_idx, int mi_row,
+ int mi_col, struct buf_2d *src,
+ struct buf_2d *pre) {
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ YV12_BUFFER_CONFIG *ref_frame = NULL;
+ int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx];
+ if (ref_frame_idx != -1) {
+ ref_frame = gf_picture[ref_frame_idx].frame;
+ }
+ src->buf = xd->cur_buf->y_buffer + mb_y_offset;
+ src->stride = xd->cur_buf->y_stride;
+ pre->buf = ref_frame->y_buffer + mb_y_offset;
+ pre->stride = ref_frame->y_stride;
+ assert(src->stride == pre->stride);
+}
+
+#define kMvPreCheckLines 5
+#define kMvPreCheckSize 15
+#define ZERO_MV_MODE 0
+#define NEW_MV_MODE 1
+#define NEAREST_MV_MODE 2
+#define NEAR_MV_MODE 3
+#define MAX_MV_MODE 4
+
+#define MV_REF_POS_NUM 3
+POSITION mv_ref_pos[MV_REF_POS_NUM] = {
+ { -1, 0 },
+ { 0, -1 },
+ { -1, -1 },
+};
+
+static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row,
+ int mi_col) {
+ return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col];
+}
+
+static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame,
+ BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ int i;
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int_mv nearest_mv, near_mv, invalid_mv;
+ nearest_mv.as_int = INVALID_MV;
+ near_mv.as_int = INVALID_MV;
+ invalid_mv.as_int = INVALID_MV;
+ for (i = 0; i < MV_REF_POS_NUM; ++i) {
+ int nb_row = mi_row + mv_ref_pos[i].row * mi_height;
+ int nb_col = mi_col + mv_ref_pos[i].col * mi_width;
+ assert(mv_ref_pos[i].row <= 0);
+ assert(mv_ref_pos[i].col <= 0);
+ if (nb_row >= 0 && nb_col >= 0) {
+ if (nearest_mv.as_int == INVALID_MV) {
+ nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
+ } else {
+ int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col);
+ if (mv.as_int == nearest_mv.as_int) {
+ continue;
+ } else {
+ near_mv = mv;
+ break;
+ }
+ }
+ }
+ }
+ if (nearest_mv.as_int == INVALID_MV) {
+ nearest_mv.as_mv.row = 0;
+ nearest_mv.as_mv.col = 0;
+ }
+ if (near_mv.as_int == INVALID_MV) {
+ near_mv.as_mv.row = 0;
+ near_mv.as_mv.col = 0;
+ }
+ if (mv_mode == NEAREST_MV_MODE) {
+ return nearest_mv;
+ }
+ if (mv_mode == NEAR_MV_MODE) {
+ return near_mv;
+ }
+ assert(0);
+ return invalid_mv;
+}
+
+static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi,
+ TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ int_mv mv;
+ switch (mv_mode) {
+ case ZERO_MV_MODE:
+ mv.as_mv.row = 0;
+ mv.as_mv.col = 0;
+ break;
+ case NEW_MV_MODE:
+ mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+ break;
+ case NEAREST_MV_MODE:
+ mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
+ break;
+ case NEAR_MV_MODE:
+ mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col);
+ break;
+ default:
+ mv.as_int = INVALID_MV;
+ assert(0);
+ break;
+ }
+ return mv;
+}
+
+static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd,
+ GF_PICTURE *gf_picture, int frame_idx,
+ TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, int_mv *mv) {
+ uint32_t sse;
+ struct buf_2d src;
+ struct buf_2d pre;
+ MV full_mv;
+ *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row,
+ mi_col);
+ full_mv = get_full_mv(&mv->as_mv);
+ get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col,
+ &src, &pre);
+ // TODO(angiebird): Consider subpixel when computing the sse.
+ cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv),
+ pre.stride, &sse);
+ return (double)sse;
+}
+
+static double get_mv_cost(int mv_mode) {
+ // TODO(angiebird): Implement this function.
+ (void)mv_mode;
+ return 0;
+}
+
+static double rd_cost(int rdmult, int rddiv, double rate, double dist) {
+ return (rate * rdmult) / (1 << 9) + dist * (1 << rddiv);
+}
+
+static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x,
+ GF_PICTURE *gf_picture, int frame_idx,
+ TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, int_mv *mv) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx,
+ tpl_frame, rf_idx, bsize, mi_row, mi_col, mv);
+ double mv_cost = get_mv_cost(mv_mode);
+ return rd_cost(x->rdmult, x->rddiv, mv_cost, mv_dist);
+}
+
+static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ GF_PICTURE *gf_picture, int frame_idx,
+ TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ double *rd, int_mv *mv) {
+ int best_mv_mode = ZERO_MV_MODE;
+ int update = 0;
+ int mv_mode;
+ *rd = 0;
+ for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) {
+ double this_rd;
+ int_mv this_mv;
+ if (mv_mode == NEW_MV_MODE) {
+ continue;
+ }
+ this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame,
+ rf_idx, bsize, mi_row, mi_col, &this_mv);
+ if (update == 0) {
+ *rd = this_rd;
+ *mv = this_mv;
+ best_mv_mode = mv_mode;
+ update = 1;
+ } else {
+ if (this_rd < *rd) {
+ *rd = this_rd;
+ *mv = this_mv;
+ best_mv_mode = mv_mode;
+ }
+ }
+ }
+ return best_mv_mode;
+}
+
+static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x,
+ GF_PICTURE *gf_picture, int frame_idx,
+ TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ double *rd) {
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int tmp_mv_mode_arr[kMvPreCheckSize];
+ int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx];
+ int_mv *select_mv_arr = cpi->select_mv_arr;
+ int_mv tmp_select_mv_arr[kMvPreCheckSize];
+ int stride = tpl_frame->stride;
+ double new_mv_rd = 0;
+ double no_new_mv_rd = 0;
+ int idx;
+ int tmp_idx;
+ assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1);
+
+ // no new mv
+ // diagnal scan order
+ tmp_idx = 0;
+ for (idx = 0; idx < kMvPreCheckSize; ++idx) {
+ int r;
+ for (r = 0; r <= idx; ++r) {
+ int c = idx - r;
+ int nb_row = mi_row + r * mi_height;
+ int nb_col = mi_col + c * mi_width;
+ if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
+ double this_rd;
+ int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
+ mv_mode_arr[nb_row * stride + nb_col] =
+ find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
+ rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+ no_new_mv_rd += this_rd;
+ tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col];
+ tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col];
+ ++tmp_idx;
+ }
+ }
+ }
+
+ // new mv
+ mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE;
+ new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx,
+ tpl_frame, rf_idx, bsize, mi_row, mi_col,
+ &select_mv_arr[mi_row * stride + mi_col]);
+ // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE
+ // beforehand.
+ for (idx = 1; idx < kMvPreCheckSize; ++idx) {
+ int r;
+ for (r = 0; r <= idx; ++r) {
+ int c = idx - r;
+ int nb_row = mi_row + r * mi_height;
+ int nb_col = mi_col + c * mi_width;
+ if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
+ double this_rd;
+ int_mv *mv = &select_mv_arr[nb_row * stride + nb_col];
+ mv_mode_arr[nb_row * stride + nb_col] =
+ find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame,
+ rf_idx, bsize, nb_row, nb_col, &this_rd, mv);
+ new_mv_rd += this_rd;
+ }
+ }
+ }
+
+ // update best_mv_mode
+ tmp_idx = 0;
+ if (no_new_mv_rd < new_mv_rd) {
+ *rd = no_new_mv_rd;
+ for (idx = 0; idx < kMvPreCheckSize; ++idx) {
+ int r;
+ for (r = 0; r <= idx; ++r) {
+ int c = idx - r;
+ int nb_row = mi_row + r * mi_height;
+ int nb_col = mi_col + c * mi_width;
+ if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) {
+ mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx];
+ select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx];
+ ++tmp_idx;
+ }
+ }
+ }
+ } else {
+ *rd = new_mv_rd;
+ }
+}
+
+void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture,
+ int frame_idx, TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize) {
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int unit_rows = tpl_frame->mi_rows / mi_height;
+ const int unit_cols = tpl_frame->mi_cols / mi_width;
+ const int max_diagonal_lines = unit_rows + unit_cols - 1;
+ int idx;
+ for (idx = 0; idx < max_diagonal_lines; ++idx) {
+ int r;
+ for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1);
+ ++r) {
+ double rd; // TODO(angiebird): Use this information later.
+ int c = idx - r;
+ int mi_row = r * mi_height;
+ int mi_col = c * mi_width;
+ assert(c >= 0 && c < unit_cols);
+ assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows);
+ assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols);
+ predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize,
+ mi_row, mi_col, &rd);
+ }
+ }
+}
+
+static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows,
+ int cols) {
+ double IxIx = 0;
+ double IxIy = 0;
+ double IyIy = 0;
+ double score;
+ int r, c;
+ vpx_clear_system_state();
+ for (r = 0; r + 1 < rows; ++r) {
+ for (c = 0; c + 1 < cols; ++c) {
+ int diff_x = buf[r * stride + c] - buf[r * stride + c + 1];
+ int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c];
+ IxIx += diff_x * diff_x;
+ IxIy += diff_x * diff_y;
+ IyIy += diff_y * diff_y;
+ }
+ }
+ IxIx /= (rows - 1) * (cols - 1);
+ IxIy /= (rows - 1) * (cols - 1);
+ IyIy /= (rows - 1) * (cols - 1);
+ score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001);
+ return score;
+}
+
static int compare_feature_score(const void *a, const void *b) {
const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
@@ -6460,6 +6759,10 @@ static void init_tpl_buffer(VP9_COMP *cpi) {
cpi->feature_score_loc_alloc = 1;
}
+ vpx_free(cpi->select_mv_arr);
+ CHECK_MEM_ERROR(
+ cm, cpi->select_mv_arr,
+ vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr)));
#endif
// TODO(jingning): Reduce the actual memory use for tpl model build up.
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index cb9ea2de2..a690ebc73 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -320,6 +320,7 @@ typedef struct TplDepFrame {
double mv_dist_sum[3];
double mv_cost_sum[3];
int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];
+ int *mv_mode_arr[3];
#endif
} TplDepFrame;
@@ -590,6 +591,7 @@ typedef struct VP9_COMP {
FEATURE_SCORE_LOC *feature_score_loc_arr;
FEATURE_SCORE_LOC **feature_score_loc_sort;
FEATURE_SCORE_LOC **feature_score_loc_heap;
+ int_mv *select_mv_arr;
#endif
TileDataEnc *tile_data;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index 8f0da48a2..5cfffe6b5 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -549,7 +549,7 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) {
}
#define FP_DN_THRESH 8
-#define FP_MAX_DN_THRESH 16
+#define FP_MAX_DN_THRESH 24
#define KERNEL_SIZE 3
// Baseline Kernal weights for first pass noise metric
@@ -843,6 +843,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
double mb_intra_factor;
double mb_brightness_factor;
double mb_neutral_count;
+ int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH);
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
@@ -1254,7 +1255,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
}
}
#endif
-
// Does the row vector point inwards or outwards?
if (mb_row < cm->mb_rows / 2) {
if (mv.row > 0)
@@ -1280,14 +1280,13 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
else if (mv.col < 0)
--(fp_acc_data->sum_in_vectors);
}
- fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
- } else if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) {
+ }
+ if (this_intra_error < scaled_low_intra_thresh) {
fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
- } else { // 0,0 mv but high error
+ } else {
fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF;
}
} else { // Intra < inter error
- int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH);
if (this_intra_error < scaled_low_intra_thresh) {
fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
if (this_motion_error < scaled_low_intra_thresh) {
@@ -2399,8 +2398,12 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
twopass->arnr_strength_adjustment = 0;
- if ((section_zeromv < 0.10) || (section_noise <= (SECTION_NOISE_DEF * 0.75)))
+ if (section_noise < 150) {
twopass->arnr_strength_adjustment -= 1;
+ if (section_noise < 75) twopass->arnr_strength_adjustment -= 1;
+ } else if (section_noise > 250)
+ twopass->arnr_strength_adjustment += 1;
+
if (section_zeromv > 0.50) twopass->arnr_strength_adjustment += 1;
}
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index 534b15acc..602cc5798 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -29,11 +29,6 @@
// #define NEW_DIAMOND_SEARCH
-static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
- const MV *mv) {
- return &buf->buf[mv->row * buf->stride + mv->col];
-}
-
void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index 6bef88747..779e8d8e7 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -38,6 +38,11 @@ typedef struct search_site_config {
int total_steps;
} search_site_config;
+static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
+ const MV *mv) {
+ return &buf->buf[mv->row * buf->stride + mv->col];
+}
+
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride);
void vp9_init3smotion_compensation(search_site_config *cfg, int stride);
@@ -143,7 +148,6 @@ static INLINE MV get_full_mv(const MV *mv) {
out_mv.col = mv->col >> 3;
return out_mv;
}
-
struct TplDepFrame;
void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
int mi_col, int rf_idx, BLOCK_SIZE bsize,
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index a3240513f..0fdc61649 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1683,6 +1683,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
int gf_temporal_ref = 0;
+ int force_test_gf_zeromv = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1939,6 +1940,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
flag_svc_subpel = 1;
}
+ // For SVC with quality layers, when QP of lower layer is lower
+ // than current layer: force check of GF-ZEROMV before early exit
+ // due to skip flag.
+ if (svc->spatial_layer_id > 0 && usable_ref_frame == GOLDEN_FRAME &&
+ no_scaling && cm->base_qindex > svc->lower_layer_qindex + 10)
+ force_test_gf_zeromv = 1;
+
for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) {
int rate_mv = 0;
int mode_rd_thresh;
@@ -2349,11 +2357,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
}
- if (x->skip) break;
+ if (x->skip &&
+ (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME]))
+ break;
// If early termination flag is 1 and at least 2 modes are checked,
// the mode search is terminated.
- if (best_early_term && idx > 0 && !scene_change_detected) {
+ if (best_early_term && idx > 0 && !scene_change_detected &&
+ (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) {
x->skip = 1;
break;
}
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index c1a079ff0..c73b0ed87 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -3034,7 +3034,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
if (content_type == VP9E_CONTENT_FILM) {
if (src_rec_min <= VERY_LOW_VAR_THRESH) {
if (ref_frame == INTRA_FRAME) *this_rd *= 2;
- if (bsize > 6) *this_rd *= 2;
+ if (bsize > BLOCK_16X16) *this_rd *= 2;
}
}
}
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index 04b589ac3..ee5f0e56c 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -192,7 +192,8 @@ static INLINE int mod_index(int sum_dist, int index, int rounding, int strength,
assert(index >= 0 && index <= 13);
assert(index_mult[index] != 0);
- mod = (clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16;
+ mod =
+ ((unsigned int)clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16;
mod += rounding;
mod >>= strength;
@@ -680,7 +681,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK);
#endif // CONFIG_VP9_HIGHBITDEPTH
- if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2);
+ if (src_variance <= 2) {
+ strength = VPXMAX(0, arnr_filter_data->strength - 2);
+ }
}
for (frame = 0; frame < frame_count; frame++) {
diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c
index 18c4f02d5..b560e2218 100644
--- a/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/vp9/encoder/x86/temporal_filter_sse4.c
@@ -967,8 +967,8 @@ static void vp9_apply_temporal_filter_chroma_8(
v_sum_row_2 = v_sum_row_3;
// Add chroma values
- u_sum_row = _mm_adds_epu8(u_sum_row_1, u_sum_row_2);
- v_sum_row = _mm_adds_epu8(v_sum_row_1, v_sum_row_2);
+ u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2);
+ v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2);
// Add luma values
add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row);
diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk
index 7ca4004b0..c9a55669e 100644
--- a/vp9/vp9_common.mk
+++ b/vp9/vp9_common.mk
@@ -64,9 +64,12 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
+endif # !CONFIG_VP9_HIGHBITDEPTH
+
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c
VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index e16152a6d..67e5389a7 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -138,10 +138,13 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_frame_scale_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c
+
+ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
+endif # !CONFIG_VP9_HIGHBITDEPTH
VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c