summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/vp9_mvref_common.c2
-rw-r--r--vp9/common/vp9_onyxc_int.h15
-rw-r--r--vp9/common/vp9_rtcd_defs.pl9
-rw-r--r--vp9/common/x86/vp9_copy_sse2.asm12
-rw-r--r--vp9/decoder/vp9_decodeframe.c6
-rw-r--r--vp9/decoder/vp9_decoder.c2
-rw-r--r--vp9/decoder/vp9_decoder.h3
-rw-r--r--vp9/decoder/vp9_dthread.c40
-rw-r--r--vp9/decoder/vp9_dthread.h7
-rw-r--r--vp9/encoder/vp9_aq_cyclicrefresh.c7
-rw-r--r--vp9/encoder/vp9_encodeframe.c224
-rw-r--r--vp9/encoder/vp9_encodemb.c108
-rw-r--r--vp9/encoder/vp9_firstpass.c33
-rw-r--r--vp9/encoder/vp9_mbgraph.c36
-rw-r--r--vp9/encoder/vp9_mcomp.c174
-rw-r--r--vp9/encoder/vp9_mcomp.h4
-rw-r--r--vp9/encoder/vp9_onyx_if.c248
-rw-r--r--vp9/encoder/vp9_onyx_int.h66
-rw-r--r--vp9/encoder/vp9_picklpf.c19
-rw-r--r--vp9/encoder/vp9_pickmode.c4
-rw-r--r--vp9/encoder/vp9_ratectrl.c177
-rw-r--r--vp9/encoder/vp9_ratectrl.h5
-rw-r--r--vp9/encoder/vp9_rdopt.c257
-rw-r--r--vp9/encoder/vp9_speed_features.c70
-rw-r--r--vp9/encoder/vp9_speed_features.h2
-rw-r--r--vp9/encoder/vp9_svc_layercontext.c2
-rw-r--r--vp9/encoder/vp9_variance.h8
-rw-r--r--vp9/encoder/x86/vp9_mcomp_x86.h48
-rw-r--r--vp9/vp9_cx_iface.c14
-rw-r--r--vp9/vp9cx.mk2
30 files changed, 731 insertions, 873 deletions
diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c
index 1aab36205..61682c42d 100644
--- a/vp9/common/vp9_mvref_common.c
+++ b/vp9/common/vp9_mvref_common.c
@@ -195,7 +195,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int block, int mi_row, int mi_col) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
- const MODE_INFO *prev_mi = cm->prev_mi
+ const MODE_INFO *prev_mi = cm->coding_use_prev_mi && cm->prev_mi
? cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]
: NULL;
const MB_MODE_INFO *const prev_mbmi = prev_mi ? &prev_mi->mbmi : NULL;
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index fe9cc9e6a..20de43414 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -120,7 +120,6 @@ typedef struct VP9Common {
// frame header, 3 reset all contexts.
int reset_frame_context;
- int frame_flags;
// MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
// MODE_INFO (8-pixel) units.
int MBs;
@@ -284,15 +283,15 @@ static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile,
xd->left_available = (mi_col > tile->mi_col_start);
}
-static INLINE MODE_INFO *get_prev_mi(VP9_COMMON *cm) {
- const int use_prev_mi = cm->coding_use_prev_mi &&
- cm->width == cm->last_width &&
- cm->height == cm->last_height &&
- !cm->intra_only &&
- cm->last_show_frame;
+static INLINE void set_prev_mi(VP9_COMMON *cm) {
+ const int use_prev_in_find_mv_refs = cm->width == cm->last_width &&
+ cm->height == cm->last_height &&
+ !cm->intra_only &&
+ cm->last_show_frame;
// Special case: set prev_mi to NULL when the previous mode info
// context cannot be used.
- return use_prev_mi ? &cm->prev_mip[cm->mi_stride + 1] : NULL;
+ cm->prev_mi = use_prev_in_find_mv_refs ?
+ cm->prev_mip + cm->mi_stride + 1 : NULL;
}
static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index b45559245..8a8155410 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -13,7 +13,6 @@ struct macroblockd;
struct macroblock;
struct vp9_variance_vtable;
-#define DEC_MVCOSTS int *mvjcost, int *mvcost[2]
struct mv;
union int_mv;
struct yv12_buffer_config;
@@ -758,20 +757,20 @@ specialize qw/vp9_fdct32x32_rd sse2 avx2/;
#
# Motion search
#
-add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv, struct mv *best_mv";
+add_proto qw/int vp9_full_search_sad/, "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv";
specialize qw/vp9_full_search_sad sse3 sse4_1/;
$vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
-add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_refining_search_sad/, "const struct macroblock *x, struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_refining_search_sad sse3/;
$vp9_refining_search_sad_sse3=vp9_refining_search_sadx4;
-add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad sse3/;
$vp9_diamond_search_sad_sse3=vp9_diamond_search_sadx4;
-add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, DEC_MVCOSTS, const struct mv *center_mv";
+add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_full_range_search/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
diff --git a/vp9/common/x86/vp9_copy_sse2.asm b/vp9/common/x86/vp9_copy_sse2.asm
index dd522c698..b26383708 100644
--- a/vp9/common/x86/vp9_copy_sse2.asm
+++ b/vp9/common/x86/vp9_copy_sse2.asm
@@ -133,10 +133,14 @@ INIT_MMX sse
movh m3, [srcq+r5q]
lea srcq, [srcq+src_strideq*4]
%ifidn %1, avg
- pavgb m0, [dstq]
- pavgb m1, [dstq+dst_strideq]
- pavgb m2, [dstq+dst_strideq*2]
- pavgb m3, [dstq+r6q]
+ movh m4, [dstq]
+ movh m5, [dstq+dst_strideq]
+ movh m6, [dstq+dst_strideq*2]
+ movh m7, [dstq+r6q]
+ pavgb m0, m4
+ pavgb m1, m5
+ pavgb m2, m6
+ pavgb m3, m7
%endif
movh [dstq ], m0
movh [dstq+dst_strideq ], m1
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 9b63961f0..022a4296f 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1296,7 +1296,11 @@ int vp9_decode_frame(VP9Decoder *pbi,
}
init_macroblockd(cm, &pbi->mb);
- cm->prev_mi = get_prev_mi(cm);
+
+ if (cm->coding_use_prev_mi)
+ set_prev_mi(cm);
+ else
+ cm->prev_mi = NULL;
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index fd74478e9..56dbc99ed 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -361,7 +361,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
if (pbi->num_tile_workers) {
- vp9_loop_filter_frame_mt(pbi, cm, &pbi->mb, cm->lf.filter_level, 0, 0);
+ vp9_loop_filter_frame_mt(pbi, cm, cm->lf.filter_level, 0, 0);
} else {
vp9_loop_filter_frame(cm, &pbi->mb, cm->lf.filter_level, 0, 0);
}
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index c9dc25191..b8250c2bb 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -49,9 +49,6 @@ typedef struct VP9Decoder {
int decoded_key_frame;
- int initial_width;
- int initial_height;
-
int do_loopfilter_inline; // apply loopfilter to available rows immediately
VP9Worker lf_worker;
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 9b124c9d9..9098063ce 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -40,13 +40,13 @@ static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
const int nsync = lf_sync->sync_range;
if (r && !(c & (nsync - 1))) {
- mutex_lock(&lf_sync->mutex_[r - 1]);
+ pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+ mutex_lock(mutex);
while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
- pthread_cond_wait(&lf_sync->cond_[r - 1],
- &lf_sync->mutex_[r - 1]);
+ pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
}
- pthread_mutex_unlock(&lf_sync->mutex_[r - 1]);
+ pthread_mutex_unlock(mutex);
}
#else
(void)lf_sync;
@@ -94,21 +94,21 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
VP9LfSync *const lf_sync, int num_lf_workers) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int r, c; // SB row and col
- LOOP_FILTER_MASK lfm;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
for (r = start; r < stop; r += num_lf_workers) {
const int mi_row = r << MI_BLOCK_SIZE_LOG2;
- MODE_INFO **mi_8x8 = cm->mi_grid_visible + mi_row * cm->mi_stride;
+ MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (c = 0; c < sb_cols; ++c) {
const int mi_col = c << MI_BLOCK_SIZE_LOG2;
+ LOOP_FILTER_MASK lfm;
int plane;
sync_read(lf_sync, r, c);
vp9_setup_dst_planes(xd, frame_buffer, mi_row, mi_col);
- vp9_setup_mask(cm, mi_row, mi_col, mi_8x8 + mi_col, cm->mi_stride, &lfm);
+ vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
vp9_filter_block_plane(cm, &xd->plane[plane], mi_row, &lfm);
@@ -134,9 +134,9 @@ static int loop_filter_row_worker(void *arg1, void *arg2) {
// threads.
void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
VP9_COMMON *cm,
- MACROBLOCKD *xd,
int frame_filter_level,
int y_only, int partial_frame) {
+ VP9LfSync *const lf_sync = &pbi->lf_row_sync;
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -146,8 +146,6 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
// Allocate memory used in thread synchronization.
// This always needs to be done even if frame_filter_level is 0.
if (!cm->current_video_frame || cm->last_height != cm->height) {
- VP9LfSync *const lf_sync = &pbi->lf_row_sync;
-
if (cm->last_height != cm->height) {
const int aligned_last_height =
ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2);
@@ -166,8 +164,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
vp9_loop_filter_frame_init(cm, frame_filter_level);
// Initialize cur_sb_col to -1 for all SB rows.
- vpx_memset(pbi->lf_row_sync.cur_sb_col, -1,
- sizeof(*pbi->lf_row_sync.cur_sb_col) * sb_rows);
+ vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Set up loopfilter thread data.
// The decoder is using num_workers instead of pbi->num_tile_workers
@@ -194,7 +191,7 @@ void vp9_loop_filter_frame_mt(VP9Decoder *pbi,
lf_data->stop = sb_rows;
lf_data->y_only = y_only; // always do all planes in decoder
- lf_data->lf_sync = &pbi->lf_row_sync;
+ lf_data->lf_sync = lf_sync;
lf_data->num_lf_workers = num_workers;
// Start loopfiltering
@@ -253,8 +250,12 @@ void vp9_loop_filter_alloc(VP9_COMMON *cm, VP9LfSync *lf_sync, int rows,
// Deallocate lf synchronization related mutex and data
void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
-#if CONFIG_MULTITHREAD
+#if !CONFIG_MULTITHREAD
+ (void)rows;
+#endif // !CONFIG_MULTITHREAD
+
if (lf_sync != NULL) {
+#if CONFIG_MULTITHREAD
int i;
if (lf_sync->mutex_ != NULL) {
@@ -269,17 +270,10 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows) {
}
vpx_free(lf_sync->cond_);
}
-
+#endif // CONFIG_MULTITHREAD
vpx_free(lf_sync->cur_sb_col);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
- vpx_memset(lf_sync, 0, sizeof(*lf_sync));
+ vp9_zero(*lf_sync);
}
-#else
- (void)rows;
- if (lf_sync != NULL) {
- vpx_free(lf_sync->cur_sb_col);
- vpx_memset(lf_sync, 0, sizeof(*lf_sync));
- }
-#endif // CONFIG_MULTITHREAD
}
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index 005bd7bbd..8738ceebd 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -12,11 +12,9 @@
#define VP9_DECODER_VP9_DTHREAD_H_
#include "./vpx_config.h"
-#include "vp9/common/vp9_loopfilter.h"
#include "vp9/decoder/vp9_reader.h"
#include "vp9/decoder/vp9_thread.h"
-struct macroblockd;
struct VP9Common;
struct VP9Decoder;
@@ -43,16 +41,15 @@ typedef struct VP9LfSyncData {
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
-void vp9_loop_filter_alloc(struct VP9Common *cm, struct VP9LfSyncData *lf_sync,
+void vp9_loop_filter_alloc(struct VP9Common *cm, VP9LfSync *lf_sync,
int rows, int width);
// Deallocate loopfilter synchronization related mutex and data.
-void vp9_loop_filter_dealloc(struct VP9LfSyncData *lf_sync, int rows);
+void vp9_loop_filter_dealloc(VP9LfSync *lf_sync, int rows);
// Multi-threaded loopfilter that uses the tile threads.
void vp9_loop_filter_frame_mt(struct VP9Decoder *pbi,
struct VP9Common *cm,
- struct macroblockd *xd,
int frame_filter_level,
int y_only, int partial_frame);
diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c
index 787909142..e55881ffc 100644
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -200,6 +200,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Rate target ratio to set q delta.
const float rate_ratio_qdelta = 2.0;
+ const double q = vp9_convert_qindex_to_q(cm->base_qindex);
vp9_clear_system_state();
// Some of these parameters may be set via codec-control function later.
cr->max_sbs_perframe = 10;
@@ -209,14 +210,12 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Set rate threshold to some fraction of target (and scaled by 256).
cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 2;
// Distortion threshold, quadratic in Q, scale factor to be adjusted.
- cr->thresh_dist_sb = 8 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
- vp9_convert_qindex_to_q(cm->base_qindex));
+ cr->thresh_dist_sb = 8 * (int)(q * q);
if (cpi->sf.use_nonrd_pick_mode) {
// May want to be more conservative with thresholds in non-rd mode for now
// as rate/distortion are derived from model based on prediction residual.
cr->thresh_rate_sb = (rc->sb64_target_rate * 256) >> 3;
- cr->thresh_dist_sb = 4 * (int)(vp9_convert_qindex_to_q(cm->base_qindex) *
- vp9_convert_qindex_to_q(cm->base_qindex));
+ cr->thresh_dist_sb = 4 * (int)(q * q);
}
cr->num_seg_blocks = 0;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 61a5022ec..70b70fec2 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -103,34 +103,31 @@ static const uint8_t VP9_VAR_OFFS[64] = {
};
static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi,
- MACROBLOCK *x,
+ const struct buf_2d *ref,
BLOCK_SIZE bs) {
- unsigned int var, sse;
- var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride,
- VP9_VAR_OFFS, 0, &sse);
+ unsigned int sse;
+ const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ VP9_VAR_OFFS, 0, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
- MACROBLOCK *x,
- int mi_row,
- int mi_col,
+ const struct buf_2d *ref,
+ int mi_row, int mi_col,
BLOCK_SIZE bs) {
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- int offset = (mi_row * MI_SIZE) * yv12->y_stride + (mi_col * MI_SIZE);
- unsigned int var, sse;
- var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf,
- x->plane[0].src.stride,
- yv12->y_buffer + offset,
- yv12->y_stride,
- &sse);
+ const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
+ const uint8_t* last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride +
+ mi_col * MI_SIZE];
+ unsigned int sse;
+ const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ last_y, last->y_stride, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
int mi_row,
int mi_col) {
- unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
mi_row, mi_col,
BLOCK_64X64);
if (var < 8)
@@ -146,7 +143,7 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi,
static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi,
int mi_row,
int mi_col) {
- unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb,
+ unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
mi_row, mi_col,
BLOCK_64X64);
if (var < 4)
@@ -246,8 +243,8 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
// R/D setup.
- x->rddiv = cpi->RDDIV;
- x->rdmult = cpi->RDMULT;
+ x->rddiv = cpi->rd.RDDIV;
+ x->rdmult = cpi->rd.RDMULT;
// Setup segment ID.
if (seg->enabled) {
@@ -822,6 +819,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int output_enabled) {
int i, x_idx, y;
VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = x->plane;
@@ -907,7 +905,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < TX_MODES; i++)
- cpi->rd_tx_select_diff[i] += ctx->tx_rd_diff[i];
+ rd_opt->tx_select_diff[i] += ctx->tx_rd_diff[i];
}
#if CONFIG_INTERNAL_STATS
@@ -940,12 +938,12 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
}
- cpi->rd_comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
- cpi->rd_comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
- cpi->rd_comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
+ rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
+ rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
+ rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- cpi->rd_filter_diff[i] += ctx->best_filter_diff[i];
+ rd_opt->filter_diff[i] += ctx->best_filter_diff[i];
}
}
@@ -1013,12 +1011,16 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
// Set to zero to make sure we do not use the previous encoded frame stats
mbmi->skip = 0;
- x->source_variance = get_sby_perpixel_variance(cpi, x, bsize);
+ x->source_variance = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+
+ // Save rdmult before it might be changed, so it can be restored later.
+ orig_rdmult = x->rdmult;
+ if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
+ activity_masking(cpi, x);
if (aq_mode == VARIANCE_AQ) {
const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
: vp9_block_energy(cpi, x, bsize);
-
if (cm->frame_type == KEY_FRAME ||
cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
@@ -1031,14 +1033,6 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
rdmult_ratio = vp9_vaq_rdmult_ratio(energy);
vp9_init_plane_quantizers(cpi, x);
- }
-
- // Save rdmult before it might be changed, so it can be restored later.
- orig_rdmult = x->rdmult;
- if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
- activity_masking(cpi, x);
-
- if (aq_mode == VARIANCE_AQ) {
vp9_clear_system_state();
x->rdmult = (int)round(x->rdmult * rdmult_ratio);
} else if (aq_mode == COMPLEXITY_AQ) {
@@ -1070,14 +1064,11 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
totaldist, bsize, ctx, best_rd);
}
- if (aq_mode == VARIANCE_AQ) {
- x->rdmult = orig_rdmult;
- if (*totalrate != INT_MAX) {
- vp9_clear_system_state();
- *totalrate = (int)round(*totalrate * rdmult_ratio);
- }
- } else if (aq_mode == COMPLEXITY_AQ || aq_mode == CYCLIC_REFRESH_AQ) {
- x->rdmult = orig_rdmult;
+ x->rdmult = orig_rdmult;
+
+ if (aq_mode == VARIANCE_AQ && *totalrate != INT_MAX) {
+ vp9_clear_system_state();
+ *totalrate = (int)round(*totalrate * rdmult_ratio);
}
}
@@ -1364,6 +1355,25 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
+static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
+ MODE_INFO **prev_mi_8x8) {
+ const int mis = cm->mi_stride;
+ int block_row, block_col;
+
+ for (block_row = 0; block_row < 8; ++block_row) {
+ for (block_col = 0; block_col < 8; ++block_col) {
+ MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
+ const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
+
+ if (prev_mi) {
+ const ptrdiff_t offset = prev_mi - cm->prev_mi;
+ mi_8x8[block_row * mis + block_col] = cm->mi + offset;
+ mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
+ }
+ }
+ }
+}
+
static void constrain_copy_partitioning(VP9_COMP *const cpi,
const TileInfo *const tile,
MODE_INFO **mi_8x8,
@@ -1413,38 +1423,10 @@ static void constrain_copy_partitioning(VP9_COMP *const cpi,
}
} else {
// Else this is a partial SB64, copy previous partition.
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[block_row * mis + block_col] = cm->mi + offset;
- mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
- }
- }
- }
+ copy_partitioning(cm, mi_8x8, prev_mi_8x8);
}
}
-static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
- MODE_INFO **prev_mi_8x8) {
- const int mis = cm->mi_stride;
- int block_row, block_col;
-
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- MODE_INFO *const prev_mi = prev_mi_8x8[block_row * mis + block_col];
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[block_row * mis + block_col] = cm->mi + offset;
- mi_8x8[block_row * mis + block_col]->mbmi.sb_type = sb_type;
- }
- }
- }
-}
const struct {
int row;
@@ -1465,13 +1447,14 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
MODE_INFO **mi_8x8,
int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCK *x = &cpi->mb;
+ MACROBLOCK *const x = &cpi->mb;
const int mis = cm->mi_stride;
- int row8x8_remaining = tile->mi_row_end - mi_row;
- int col8x8_remaining = tile->mi_col_end - mi_col;
- int r, c;
+ const int row8x8_remaining = tile->mi_row_end - mi_row;
+ const int col8x8_remaining = tile->mi_col_end - mi_col;
MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
// In-image SB64
@@ -1483,8 +1466,8 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
(mi_col * MI_SIZE);
const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
- const int thr_32x32 = cpi->sf.source_var_thresh;
- const int thr_64x64 = thr_32x32 << 1;
+ const unsigned int thr_32x32 = cpi->sf.source_var_thresh;
+ const unsigned int thr_64x64 = thr_32x32 << 1;
int i, j;
int index;
diff d32[4];
@@ -1550,16 +1533,13 @@ static void set_source_var_based_partition(VP9_COMP *cpi,
BLOCK_SIZE bsize = BLOCK_16X16;
int bh = num_8x8_blocks_high_lookup[bsize];
int bw = num_8x8_blocks_wide_lookup[bsize];
-
+ int r, c;
for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
- int index = r * mis + c;
- // Find a partition size that fits
- bsize = find_partition_size(bsize,
- (row8x8_remaining - r),
- (col8x8_remaining - c), &bh, &bw);
+ const int index = r * mis + c;
mi_8x8[index] = mi_upper_left + index;
- mi_8x8[index]->mbmi.sb_type = bsize;
+ mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
+ row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
}
}
}
@@ -2234,7 +2214,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (cpi->sf.disable_split_var_thresh && partition_none_allowed) {
unsigned int source_variancey;
vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
- source_variancey = get_sby_perpixel_variance(cpi, x, bsize);
+ source_variancey = get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
if (source_variancey < cpi->sf.disable_split_var_thresh) {
do_split = 0;
if (source_variancey < cpi->sf.disable_split_var_thresh / 2)
@@ -2668,9 +2648,10 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
return ALLOW_32X32;
} else if (cpi->sf.tx_size_search_method == USE_FULL_RD) {
+ const RD_OPT *const rd_opt = &cpi->rd;
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
- return cpi->rd_tx_select_threshes[frame_type][ALLOW_32X32] >
- cpi->rd_tx_select_threshes[frame_type][TX_MODE_SELECT] ?
+ return rd_opt->tx_select_threshes[frame_type][ALLOW_32X32] >
+ rd_opt->tx_select_threshes[frame_type][TX_MODE_SELECT] ?
ALLOW_32X32 : TX_MODE_SELECT;
} else {
unsigned int total = 0;
@@ -3201,7 +3182,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
1, &dummy_rate, &dummy_dist);
break;
case SOURCE_VAR_BASED_PARTITION:
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
@@ -3232,8 +3212,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
}
// end RTC play code
+static int get_skip_encode_frame(const VP9_COMMON *cm) {
+ unsigned int intra_count = 0, inter_count = 0;
+ int j;
+
+ for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
+ intra_count += cm->counts.intra_inter[j][0];
+ inter_count += cm->counts.intra_inter[j][1];
+ }
+
+ return (intra_count << 2) < inter_count &&
+ cm->frame_type != KEY_FRAME &&
+ cm->show_frame;
+}
+
static void encode_frame_internal(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
+ RD_OPT *const rd_opt = &cpi->rd;
MACROBLOCK *const x = &cpi->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3244,10 +3239,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(cm->counts);
vp9_zero(cpi->coef_counts);
vp9_zero(cpi->tx_stepdown_count);
- vp9_zero(cpi->rd_comp_pred_diff);
- vp9_zero(cpi->rd_filter_diff);
- vp9_zero(cpi->rd_tx_select_diff);
- vp9_zero(cpi->rd_tx_select_threshes);
+ vp9_zero(rd_opt->comp_pred_diff);
+ vp9_zero(rd_opt->filter_diff);
+ vp9_zero(rd_opt->tx_select_diff);
+ vp9_zero(rd_opt->tx_select_threshes);
cm->tx_mode = select_tx_mode(cpi);
@@ -3266,7 +3261,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
if (cpi->oxcf.tuning == VP8_TUNE_SSIM)
build_activity_map(cpi);
- cm->prev_mi = get_prev_mi(cm);
+ set_prev_mi(cm);
if (sf->use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD
@@ -3284,9 +3279,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
}
vp9_zero(x->zcoeff_blk);
- if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION &&
+ if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION &&
cm->current_video_frame > 0) {
- int check_freq = cpi->sf.search_type_check_frequency;
+ int check_freq = sf->search_type_check_frequency;
if ((cm->current_video_frame - 1) % check_freq == 0) {
cpi->use_large_partition_rate = 0;
@@ -3303,7 +3298,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
if ((cm->current_video_frame - 1) % check_freq >= 1) {
if (cpi->use_large_partition_rate < 15)
- cpi->sf.partition_search_type = FIXED_PARTITION;
+ sf->partition_search_type = FIXED_PARTITION;
}
}
}
@@ -3344,19 +3339,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
}
- if (sf->skip_encode_sb) {
- int j;
- unsigned int intra_count = 0, inter_count = 0;
- for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
- intra_count += cm->counts.intra_inter[j][0];
- inter_count += cm->counts.intra_inter[j][1];
- }
- sf->skip_encode_frame = (intra_count << 2) < inter_count &&
- cm->frame_type != KEY_FRAME &&
- cm->show_frame;
- } else {
- sf->skip_encode_frame = 0;
- }
+ sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm) : 0;
#if 0
// Keep record of the total distortion this time around for future use
@@ -3366,6 +3349,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
// In the longer term the encoder should be generalized to match the
// decoder such that we allow compound where one of the 3 buffers has a
@@ -3398,8 +3382,8 @@ void vp9_encode_frame(VP9_COMP *cpi) {
// that for subsequent frames.
// It does the same analysis for transform size selection also.
const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
- const int64_t *mode_thresh = cpi->rd_prediction_type_threshes[frame_type];
- const int64_t *filter_thresh = cpi->rd_filter_threshes[frame_type];
+ const int64_t *mode_thresh = rd_opt->prediction_type_threshes[frame_type];
+ const int64_t *filter_thresh = rd_opt->filter_threshes[frame_type];
/* prediction (compound, single or hybrid) mode selection */
if (frame_type == ALTREF_FRAME || !cm->allow_comp_inter_inter)
@@ -3432,25 +3416,25 @@ void vp9_encode_frame(VP9_COMP *cpi) {
encode_frame_internal(cpi);
for (i = 0; i < REFERENCE_MODES; ++i) {
- const int diff = (int) (cpi->rd_comp_pred_diff[i] / cm->MBs);
- cpi->rd_prediction_type_threshes[frame_type][i] += diff;
- cpi->rd_prediction_type_threshes[frame_type][i] >>= 1;
+ const int diff = (int) (rd_opt->comp_pred_diff[i] / cm->MBs);
+ rd_opt->prediction_type_threshes[frame_type][i] += diff;
+ rd_opt->prediction_type_threshes[frame_type][i] >>= 1;
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
- const int64_t diff = cpi->rd_filter_diff[i] / cm->MBs;
- cpi->rd_filter_threshes[frame_type][i] =
- (cpi->rd_filter_threshes[frame_type][i] + diff) / 2;
+ const int64_t diff = rd_opt->filter_diff[i] / cm->MBs;
+ rd_opt->filter_threshes[frame_type][i] =
+ (rd_opt->filter_threshes[frame_type][i] + diff) / 2;
}
for (i = 0; i < TX_MODES; ++i) {
- int64_t pd = cpi->rd_tx_select_diff[i];
+ int64_t pd = rd_opt->tx_select_diff[i];
int diff;
if (i == TX_MODE_SELECT)
pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0);
diff = (int) (pd / cm->MBs);
- cpi->rd_tx_select_threshes[frame_type][i] += diff;
- cpi->rd_tx_select_threshes[frame_type][i] /= 2;
+ rd_opt->tx_select_threshes[frame_type][i] += diff;
+ rd_opt->tx_select_threshes[frame_type][i] /= 2;
}
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 5e98e4e3f..b0c014eef 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -63,24 +63,17 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
#define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
-typedef struct vp9_token_state vp9_token_state;
-struct vp9_token_state {
+typedef struct vp9_token_state {
int rate;
int error;
int next;
signed char token;
short qc;
-};
+} vp9_token_state;
// TODO(jimbankoski): experiment to find optimal RD numbers.
-#define Y1_RD_MULT 4
-#define UV_RD_MULT 2
-
-static const int plane_rd_mult[4] = {
- Y1_RD_MULT,
- UV_RD_MULT,
-};
+static const int plane_rd_mult[PLANE_TYPES] = { 4, 2 };
#define UPDATE_RD_COST()\
{\
@@ -105,60 +98,56 @@ static int trellis_get_coeff_context(const int16_t *scan,
return pt;
}
-static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size, MACROBLOCK *mb,
- ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) {
+static int optimize_b(MACROBLOCK *mb, int plane, int block,
+ BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) {
MACROBLOCKD *const xd = &mb->e_mbd;
- struct macroblock_plane *p = &mb->plane[plane];
- struct macroblockd_plane *pd = &xd->plane[plane];
+ struct macroblock_plane *const p = &mb->plane[plane];
+ struct macroblockd_plane *const pd = &xd->plane[plane];
const int ref = is_inter_block(&xd->mi[0]->mbmi);
vp9_token_state tokens[1025][2];
unsigned best_index[1025][2];
- const int16_t *coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
- int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
- int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- int eob = p->eobs[block], final_eob, sz = 0;
- const int i0 = 0;
- int rc, x, next, i;
- int64_t rdmult, rddiv, rd_cost0, rd_cost1;
- int rate0, rate1, error0, error1, t0, t1;
- int best, band, pt;
- PLANE_TYPE type = pd->plane_type;
- int err_mult = plane_rd_mult[type];
+ uint8_t token_cache[1024];
+ const int16_t *const coeff = BLOCK_OFFSET(mb->plane[plane].coeff, block);
+ int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
+ int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
+ const int eob = p->eobs[block];
+ const PLANE_TYPE type = pd->plane_type;
const int default_eob = 16 << (tx_size << 1);
const int mul = 1 + (tx_size == TX_32X32);
- uint8_t token_cache[1024];
const int16_t *dequant_ptr = pd->dequant;
const uint8_t *const band_translate = get_band_translate(tx_size);
- const scan_order *so = get_scan(xd, tx_size, type, block);
- const int16_t *scan = so->scan;
- const int16_t *nb = so->neighbors;
+ const scan_order *const so = get_scan(xd, tx_size, type, block);
+ const int16_t *const scan = so->scan;
+ const int16_t *const nb = so->neighbors;
+ int next = eob, sz = 0;
+ int64_t rdmult = mb->rdmult * plane_rd_mult[type], rddiv = mb->rddiv;
+ int64_t rd_cost0, rd_cost1;
+ int rate0, rate1, error0, error1, t0, t1;
+ int best, band, pt, i, final_eob;
assert((!type && !plane) || (type && plane));
assert(eob <= default_eob);
/* Now set up a Viterbi trellis to evaluate alternative roundings. */
- rdmult = mb->rdmult * err_mult;
- if (!is_inter_block(&mb->e_mbd.mi[0]->mbmi))
+ if (!ref)
rdmult = (rdmult * 9) >> 4;
- rddiv = mb->rddiv;
+
/* Initialize the sentinel node of the trellis. */
tokens[eob][0].rate = 0;
tokens[eob][0].error = 0;
tokens[eob][0].next = default_eob;
tokens[eob][0].token = EOB_TOKEN;
tokens[eob][0].qc = 0;
- *(tokens[eob] + 1) = *(tokens[eob] + 0);
- next = eob;
+ tokens[eob][1] = tokens[eob][0];
+
for (i = 0; i < eob; i++)
- token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
- qcoeff[scan[i]]].token];
+ token_cache[scan[i]] =
+ vp9_pt_energy_class[vp9_dct_value_tokens_ptr[qcoeff[scan[i]]].token];
- for (i = eob; i-- > i0;) {
+ for (i = eob; i-- > 0;) {
int base_bits, d2, dx;
-
- rc = scan[i];
- x = qcoeff[rc];
+ const int rc = scan[i];
+ int x = qcoeff[rc];
/* Only add a trellis state for non-zero coefficients. */
if (x) {
int shortcut = 0;
@@ -172,17 +161,15 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
if (next < default_eob) {
band = band_translate[i + 1];
pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
- rate0 +=
- mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][0].token];
- rate1 +=
- mb->token_costs[tx_size][type][ref][band][0][pt]
- [tokens[next][1].token];
+ rate0 += mb->token_costs[tx_size][type][ref][band][0][pt]
+ [tokens[next][0].token];
+ rate1 += mb->token_costs[tx_size][type][ref][band][0][pt]
+ [tokens[next][1].token];
}
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
- base_bits = *(vp9_dct_value_cost_ptr + x);
+ base_bits = vp9_dct_value_cost_ptr[x];
dx = mul * (dqcoeff[rc] - coeff[rc]);
d2 = dx * dx;
tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
@@ -196,9 +183,9 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
- if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
- (abs(x)*dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
- dequant_ptr[rc != 0]))
+ if ((abs(x) * dequant_ptr[rc != 0] > abs(coeff[rc]) * mul) &&
+ (abs(x) * dequant_ptr[rc != 0] < abs(coeff[rc]) * mul +
+ dequant_ptr[rc != 0]))
shortcut = 1;
else
shortcut = 0;
@@ -235,7 +222,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
UPDATE_RD_COST();
/* And pick the best. */
best = rd_cost1 < rd_cost0;
- base_bits = *(vp9_dct_value_cost_ptr + x);
+ base_bits = vp9_dct_value_cost_ptr[x];
if (shortcut) {
dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
@@ -274,26 +261,26 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
/* Now pick the best path through the whole trellis. */
band = band_translate[i + 1];
- pt = combine_entropy_contexts(*a, *l);
rate0 = tokens[next][0].rate;
rate1 = tokens[next][1].rate;
error0 = tokens[next][0].error;
error1 = tokens[next][1].error;
t0 = tokens[next][0].token;
t1 = tokens[next][1].token;
- rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
- rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
+ rate0 += mb->token_costs[tx_size][type][ref][band][0][ctx][t0];
+ rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
UPDATE_RD_COST();
best = rd_cost1 < rd_cost0;
- final_eob = i0 - 1;
+ final_eob = -1;
vpx_memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
vpx_memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
for (i = next; i < eob; i = next) {
- x = tokens[i][best].qc;
+ const int x = tokens[i][best].qc;
+ const int rc = scan[i];
if (x) {
final_eob = i;
}
- rc = scan[i];
+
qcoeff[rc] = x;
dqcoeff[rc] = (x * dequant_ptr[rc != 0]) / mul;
@@ -303,7 +290,7 @@ static void optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
final_eob++;
mb->plane[plane].eobs[block] = final_eob;
- *a = *l = (final_eob > 0);
+ return final_eob;
}
static INLINE void fdct32x32(int rd_transform,
@@ -393,7 +380,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
- optimize_b(plane, block, plane_bsize, tx_size, x, a, l);
+ const int ctx = combine_entropy_contexts(*a, *l);
+ *a = *l = optimize_b(x, plane, block, plane_bsize, tx_size, ctx) > 0;
} else {
*a = *l = p->eobs[block] > 0;
}
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index c51f43fa8..874767de7 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -376,15 +376,12 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
}
}
-static unsigned int zz_motion_search(const MACROBLOCK *x) {
- const MACROBLOCKD *const xd = &x->e_mbd;
- const uint8_t *const src = x->plane[0].src.buf;
- const int src_stride = x->plane[0].src.stride;
- const uint8_t *const ref = xd->plane[0].pre[0].buf;
- const int ref_stride = xd->plane[0].pre[0].stride;
+static unsigned int get_prediction_error(BLOCK_SIZE bsize,
+ const struct buf_2d *src,
+ const struct buf_2d *ref) {
unsigned int sse;
- vp9_variance_fn_t fn = get_block_variance_fn(xd->mi[0]->mbmi.sb_type);
- fn(src, src_stride, ref, ref_stride, &sse);
+ const vp9_variance_fn_t fn = get_block_variance_fn(bsize);
+ fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
return sse;
}
@@ -416,9 +413,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
// Center the initial step/diamond search on best mv.
tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
step_param,
- x->sadperbit16, &num00, &v_fn_ptr,
- x->nmvjointcost,
- x->mvcost, ref_mv);
+ x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -442,9 +437,7 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
} else {
tmp_err = cpi->diamond_search_sad(x, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16,
- &num00, &v_fn_ptr,
- x->nmvjointcost,
- x->mvcost, ref_mv);
+ &num00, &v_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
if (tmp_err < INT_MAX - new_mv_mode_penalty)
@@ -632,7 +625,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
int_mv mv, tmp_mv;
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
- motion_error = zz_motion_search(x);
+ motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
// Assume 0,0 motion with no mv overhead.
mv.as_int = tmp_mv.as_int = 0;
@@ -668,7 +662,8 @@ void vp9_first_pass(VP9_COMP *cpi) {
int gf_motion_error;
xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
- gf_motion_error = zz_motion_search(x);
+ gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv.as_mv,
&gf_motion_error);
@@ -980,8 +975,6 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
oxcf->target_bandwidth / 10000000.0);
}
- cpi->output_framerate = oxcf->framerate;
-
// Calculate a minimum intra value to be used in determining the IIratio
// scores used in the second pass. We have this minimum to make sure
// that clips that are static but "low complexity" in the intra domain
@@ -2186,7 +2179,7 @@ void vp9_rc_get_first_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 ||
- (cm->frame_flags & FRAMEFLAGS_KEY))) {
+ (cpi->frame_flags & FRAMEFLAGS_KEY))) {
cm->frame_type = KEY_FRAME;
} else {
cm->frame_type = INTER_FRAME;
@@ -2256,7 +2249,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Keyframe and section processing.
if (rc->frames_to_key == 0 ||
- (cm->frame_flags & FRAMEFLAGS_KEY)) {
+ (cpi->frame_flags & FRAMEFLAGS_KEY)) {
// Define next KF group and assign bits to it.
this_frame_copy = this_frame;
find_next_key_frame(cpi, &this_frame_copy);
diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c
index 44b171fd1..a9da7283a 100644
--- a/vp9/encoder/vp9_mbgraph.c
+++ b/vp9/encoder/vp9_mbgraph.c
@@ -77,12 +77,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi,
INT_MAX);
}
-static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
+static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
int_mv *dst_mv, int mb_row, int mb_col) {
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
unsigned int err, tmp_err;
- int_mv tmp_mv;
+ MV tmp_mv;
// Try zero MV first
// FIXME should really use something like near/nearest MV and/or MV prediction
@@ -93,24 +93,22 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const int_mv *ref_mv,
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search
- tmp_err = do_16x16_motion_iteration(cpi, &ref_mv->as_mv, &tmp_mv.as_mv,
- mb_row, mb_col);
+ tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col);
if (tmp_err < err) {
err = tmp_err;
- dst_mv->as_int = tmp_mv.as_int;
+ dst_mv->as_mv = tmp_mv;
}
// If the current best reference mv is not centered on 0,0 then do a 0,0
// based search as well.
- if (ref_mv->as_int) {
+ if (ref_mv->row != 0 || ref_mv->col != 0) {
unsigned int tmp_err;
- int_mv zero_ref_mv, tmp_mv;
+ MV zero_ref_mv = {0, 0}, tmp_mv;
- zero_ref_mv.as_int = 0;
- tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv.as_mv, &tmp_mv.as_mv,
+ tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv,
mb_row, mb_col);
if (tmp_err < err) {
- dst_mv->as_int = tmp_mv.as_int;
+ dst_mv->as_mv = tmp_mv;
err = tmp_err;
}
}
@@ -173,7 +171,7 @@ static void update_mbgraph_mb_stats
YV12_BUFFER_CONFIG *buf,
int mb_y_offset,
YV12_BUFFER_CONFIG *golden_ref,
- int_mv *prev_golden_ref_mv,
+ const MV *prev_golden_ref_mv,
YV12_BUFFER_CONFIG *alt_ref,
int mb_row,
int mb_col
@@ -239,13 +237,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
int mb_col, mb_row, offset = 0;
int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0;
- int_mv arf_top_mv, gld_top_mv;
+ MV arf_top_mv = {0, 0}, gld_top_mv = {0, 0};
MODE_INFO mi_local = { { 0 } };
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
- arf_top_mv.as_int = 0;
- gld_top_mv.as_int = 0;
x->mv_row_min = -BORDER_MV_PIXELS_B16;
x->mv_row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->up_available = 0;
@@ -258,15 +254,13 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
mi_local.mbmi.ref_frame[1] = NONE;
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) {
- int_mv arf_left_mv, gld_left_mv;
+ MV arf_left_mv = arf_top_mv, gld_left_mv = gld_top_mv;
int mb_y_in_offset = mb_y_offset;
int arf_y_in_offset = arf_y_offset;
int gld_y_in_offset = gld_y_offset;
// Set up limit values for motion vectors to prevent them extending outside
// the UMV borders.
- arf_left_mv.as_int = arf_top_mv.as_int;
- gld_left_mv.as_int = gld_top_mv.as_int;
x->mv_col_min = -BORDER_MV_PIXELS_B16;
x->mv_col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16;
xd->left_available = 0;
@@ -277,11 +271,11 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi,
update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset,
golden_ref, &gld_left_mv, alt_ref,
mb_row, mb_col);
- arf_left_mv.as_int = mb_stats->ref[ALTREF_FRAME].m.mv.as_int;
- gld_left_mv.as_int = mb_stats->ref[GOLDEN_FRAME].m.mv.as_int;
+ arf_left_mv = mb_stats->ref[ALTREF_FRAME].m.mv.as_mv;
+ gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv;
if (mb_col == 0) {
- arf_top_mv.as_int = arf_left_mv.as_int;
- gld_top_mv.as_int = gld_left_mv.as_int;
+ arf_top_mv = arf_left_mv;
+ gld_top_mv = gld_left_mv;
}
xd->left_available = 1;
mb_y_in_offset += 16;
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c
index f7a02a4a7..8a7901172 100644
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -66,7 +66,7 @@ int vp9_init_search_range(VP9_COMP *cpi, int size) {
}
static INLINE int mv_cost(const MV *mv,
- const int *joint_cost, int *comp_cost[2]) {
+ const int *joint_cost, int *const comp_cost[2]) {
return joint_cost[vp9_get_mv_joint(mv)] +
comp_cost[0][mv->row] + comp_cost[1][mv->col];
}
@@ -90,14 +90,13 @@ static int mv_err_cost(const MV *mv, const MV *ref,
return 0;
}
-static int mvsad_err_cost(const MV *mv, const MV *ref,
- const int *mvjsadcost, int *mvsadcost[2],
+static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int error_per_bit) {
- if (mvsadcost) {
+ if (x->nmvsadcost) {
const MV diff = { mv->row - ref->row,
mv->col - ref->col };
- return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjsadcost, mvsadcost) *
- error_per_bit, 8);
+ return ROUND_POWER_OF_TWO(mv_cost(&diff, x->nmvjointsadcost,
+ x->nmvsadcost) * error_per_bit, 8);
}
return 0;
}
@@ -170,14 +169,13 @@ static INLINE int sp(int x) {
return (x & 7) << 1;
}
-static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c,
- int offset) {
- return &buf[(r >> 3) * stride + (c >> 3) - offset];
+static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
+ return &buf[(r >> 3) * stride + (c >> 3)];
}
/* returns subpixel variance error function */
#define DIST(r, c) \
- vfp->svf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), z, \
+ vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, &sse)
/* checks if (r, c) has better score than previous best */
@@ -270,7 +268,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
int *mvjcost, int *mvcost[2],
int *distortion,
unsigned int *sse1) {
- const uint8_t *z = x->plane[0].src.buf;
+ const uint8_t *const z = x->plane[0].src.buf;
const int src_stride = x->plane[0].src.stride;
const MACROBLOCKD *xd = &x->e_mbd;
unsigned int besterr = INT_MAX;
@@ -283,7 +281,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
const int y_stride = xd->plane[0].pre[0].stride;
const int offset = bestmv->row * y_stride + bestmv->col;
- const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+ const uint8_t *const y = xd->plane[0].pre[0].buf;
int rr = ref_mv->row;
int rc = ref_mv->col;
@@ -303,7 +301,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
bestmv->col *= 8;
// calculate central point error
- besterr = vfp->vf(y, y_stride, z, src_stride, sse1);
+ besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -353,7 +351,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
#undef DIST
/* returns subpixel variance error function */
#define DIST(r, c) \
- vfp->svaf(pre(y, y_stride, r, c, offset), y_stride, sp(c), sp(r), \
+ vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \
z, src_stride, &sse, second_pred)
int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
@@ -368,7 +366,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
unsigned int *sse1,
const uint8_t *second_pred,
int w, int h) {
- const uint8_t *z = x->plane[0].src.buf;
+ const uint8_t *const z = x->plane[0].src.buf;
const int src_stride = x->plane[0].src.stride;
const MACROBLOCKD *xd = &x->e_mbd;
unsigned int besterr = INT_MAX;
@@ -382,7 +380,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64);
const int y_stride = xd->plane[0].pre[0].stride;
const int offset = bestmv->row * y_stride + bestmv->col;
- const uint8_t *y = xd->plane[0].pre[0].buf + offset;
+ const uint8_t *const y = xd->plane[0].pre[0].buf;
int rr = ref_mv->row;
int rc = ref_mv->col;
@@ -404,7 +402,7 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
// calculate central point error
// TODO(yunqingwang): central pointer error was already calculated in full-
// pixel search, and can be passed in this function.
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y, y_stride);
+ vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, z, src_stride, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -479,8 +477,7 @@ static INLINE int is_mv_in(const MACROBLOCK *x, const MV *mv) {
{\
if (thissad < bestsad) {\
if (use_mvcost) \
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv, \
- mvjsadcost, mvsadcost, sad_per_bit);\
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);\
if (thissad < bestsad) {\
bestsad = thissad;\
best_site = i;\
@@ -520,9 +517,6 @@ static int vp9_pattern_search(const MACROBLOCK *x,
int k = -1;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
int best_init_s = search_param_to_steps[search_param];
- const int *const mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
// adjust ref_mv to make sure it is within MV range
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
br = ref_mv->row;
@@ -531,8 +525,8 @@ static int vp9_pattern_search(const MACROBLOCK *x,
// Work out the start point for the search
bestsad = vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride,
- 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ 0x7fffffff) + mvsad_err_cost(x, ref_mv, &fcenter_mv,
+ sad_per_bit);
// Search all possible scales upto the search param around the center point
// pick the scale of the point that is best as the starting scale of
@@ -880,7 +874,6 @@ int vp9_fast_dia_search(const MACROBLOCK *x,
int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *what = x->plane[0].src.buf;
@@ -893,10 +886,6 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
unsigned int thissad;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
int tr, tc;
int best_tr = 0;
int best_tc = 0;
@@ -918,8 +907,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
// Check the starting position
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
start_row = MAX(-range, x->mv_row_min - ref_row);
start_col = MAX(-range, x->mv_col_min - ref_col);
@@ -940,8 +928,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
if (sad_array[i] < bestsad) {
const MV this_mv = {ref_row + tr, ref_col + tc + i};
thissad = sad_array[i] +
- mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_tr = tr;
@@ -957,8 +944,7 @@ int vp9_full_range_search_c(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
if (thissad < bestsad) {
const MV this_mv = {ref_row + tr, ref_col + tc + i};
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
@@ -979,7 +965,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
@@ -991,23 +976,22 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
const search_site *const ss = &x->ss[search_param * x->searches_per_step];
const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
- const uint8_t *best_address;
+ const uint8_t *best_address, *in_what_ref;
int best_sad = INT_MAX;
int best_site = 0;
int last_site = 0;
int i, j, step;
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
- best_address = get_buf_from_mv(in_what, ref_mv);
+ in_what_ref = get_buf_from_mv(in_what, ref_mv);
+ best_address = in_what_ref;
*num00 = 0;
*best_mv = *ref_mv;
// Check the starting position
best_sad = fn_ptr->sdf(what->buf, what->stride,
- in_what->buf, in_what->stride, 0x7fffffff) +
- mvsad_err_cost(best_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+ best_address, in_what->stride, 0x7fffffff) +
+ mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
@@ -1020,8 +1004,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
best_address + ss[i].offset, in_what->stride,
best_sad);
if (sad < best_sad) {
- sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = i;
@@ -1046,8 +1029,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
best_address + ss[best_site].offset,
in_what->stride, best_sad);
if (sad < best_sad) {
- sad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_mv->row += ss[best_site].mv.row;
@@ -1060,7 +1042,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x,
break;
};
#endif
- } else if (best_address == in_what->buf) {
+ } else if (best_address == in_what_ref) {
(*num00)++;
}
}
@@ -1071,7 +1053,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv) {
int i, j, step;
@@ -1098,10 +1079,6 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
const int tot_steps = (x->ss_count / x->searches_per_step) - search_param;
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
-
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max);
ref_row = ref_mv->row;
ref_col = ref_mv->col;
@@ -1115,8 +1092,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
// Check the starting position
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
@@ -1149,9 +1125,8 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
if (sad_array[t] < bestsad) {
const MV this_mv = {best_mv->row + ss[i].mv.row,
best_mv->col + ss[i].mv.col};
- sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv,
+ sad_per_bit);
if (sad_array[t] < bestsad) {
bestsad = sad_array[t];
best_site = i;
@@ -1171,9 +1146,7 @@ int vp9_diamond_search_sadx4(const MACROBLOCK *x,
in_what_stride, bestsad);
if (thissad < bestsad) {
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_site = i;
@@ -1231,8 +1204,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
int thissme, n, num00 = 0;
int bestsme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
step_param, sadpb, &n,
- fn_ptr, x->nmvjointcost,
- x->mvcost, ref_mv);
+ fn_ptr, ref_mv);
if (bestsme < INT_MAX)
bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
*dst_mv = temp_mv;
@@ -1250,8 +1222,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
} else {
thissme = cpi->diamond_search_sad(x, mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
- fn_ptr, x->nmvjointcost, x->mvcost,
- ref_mv);
+ fn_ptr, ref_mv);
if (thissme < INT_MAX)
thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
@@ -1271,8 +1242,7 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
const int search_range = 8;
MV best_mv = *dst_mv;
thissme = cpi->refining_search_sad(x, &best_mv, sadpb, search_range,
- fn_ptr, x->nmvjointcost, x->mvcost,
- ref_mv);
+ fn_ptr, ref_mv);
if (thissme < INT_MAX)
thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
if (thissme < bestsme) {
@@ -1286,7 +1256,6 @@ int vp9_full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x,
int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv, MV *best_mv) {
int r, c;
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -1296,12 +1265,10 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const int row_max = MIN(ref_mv->row + distance, x->mv_row_max);
const int col_min = MAX(ref_mv->col - distance, x->mv_col_min);
const int col_max = MIN(ref_mv->col + distance, x->mv_col_max);
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, sad_per_bit);
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
*best_mv = *ref_mv;
for (r = row_min; r < row_max; ++r) {
@@ -1309,9 +1276,7 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
const MV mv = {r, c};
const int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride, best_sad) +
- mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
- sad_per_bit);
-
+ mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
@@ -1324,7 +1289,6 @@ int vp9_full_search_sad_c(const MACROBLOCK *x, const MV *ref_mv,
int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv, MV *best_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *const what = x->plane[0].src.buf;
@@ -1346,8 +1310,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
const int col_max = MIN(ref_col + distance, x->mv_col_max);
unsigned int sad_array[3];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
// Work out the mid point for the search
const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
@@ -1358,8 +1320,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
// Baseline value at the centre
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
for (r = row_min; r < row_max; r++) {
const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1376,9 +1337,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_mv->row = r;
@@ -1396,9 +1355,7 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_mv->row = r;
@@ -1416,7 +1373,6 @@ int vp9_full_search_sadx3(const MACROBLOCK *x, const MV *ref_mv,
int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
int sad_per_bit, int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv, MV *best_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const uint8_t *const what = x->plane[0].src.buf;
@@ -1439,9 +1395,6 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
unsigned int sad_array[3];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
// Work out the mid point for the search
const uint8_t *bestaddress = &in_what[ref_row * in_what_stride + ref_col];
@@ -1451,8 +1404,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
// Baseline value at the center
bestsad = fn_ptr->sdf(what, what_stride,
bestaddress, in_what_stride, 0x7fffffff)
- + mvsad_err_cost(best_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
+ + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
for (r = row_min; r < row_max; r++) {
const uint8_t *check_here = &in_what[r * in_what_stride + col_min];
@@ -1469,9 +1421,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_mv->row = r;
@@ -1494,9 +1444,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_mv->row = r;
@@ -1515,9 +1463,7 @@ int vp9_full_search_sadx8(const MACROBLOCK *x, const MV *ref_mv,
if (thissad < bestsad) {
this_mv.col = c;
- thissad += mvsad_err_cost(&this_mv, &fcenter_mv,
- mvjsadcost, mvsadcost, sad_per_bit);
-
+ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (thissad < bestsad) {
bestsad = thissad;
best_mv->row = r;
@@ -1536,20 +1482,16 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
const MACROBLOCKD *const xd = &x->e_mbd;
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
-
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv),
in_what->stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
for (i = 0; i < search_range; i++) {
@@ -1562,8 +1504,7 @@ int vp9_refining_search_sad_c(const MACROBLOCK *x,
unsigned int sad = fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride, best_sad);
if (sad < best_sad) {
- sad += mvsad_err_cost(&mv, &fcenter_mv, mvjsadcost, mvsadcost,
- error_per_bit);
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = j;
@@ -1586,19 +1527,16 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[4] = {{ -1, 0}, {0, -1}, {0, 1}, {1, 0}};
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address,
in_what->stride, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
for (i = 0; i < search_range; i++) {
@@ -1623,9 +1561,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
if (sads[j] < best_sad) {
const MV mv = {ref_mv->row + neighbors[j].row,
ref_mv->col + neighbors[j].col};
- sads[j] += mvsad_err_cost(&mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
-
+ sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sads[j] < best_sad) {
best_sad = sads[j];
best_site = j;
@@ -1642,9 +1578,7 @@ int vp9_refining_search_sadx4(const MACROBLOCK *x,
get_buf_from_mv(in_what, &mv),
in_what->stride, best_sad);
if (sad < best_sad) {
- sad += mvsad_err_cost(&mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
-
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = j;
@@ -1672,7 +1606,6 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv,
const uint8_t *second_pred, int w, int h) {
const MV neighbors[8] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0},
@@ -1681,12 +1614,10 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
const struct buf_2d *const what = &x->plane[0].src;
const struct buf_2d *const in_what = &xd->plane[0].pre[0];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
- const int *mvjsadcost = x->nmvjointsadcost;
- int *mvsadcost[2] = {x->nmvsadcost[0], x->nmvsadcost[1]};
unsigned int best_sad = fn_ptr->sdaf(what->buf, what->stride,
get_buf_from_mv(in_what, ref_mv), in_what->stride,
second_pred, 0x7fffffff) +
- mvsad_err_cost(ref_mv, &fcenter_mv, mvjsadcost, mvsadcost, error_per_bit);
+ mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
for (i = 0; i < search_range; ++i) {
@@ -1701,8 +1632,7 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x,
get_buf_from_mv(in_what, &mv), in_what->stride,
second_pred, best_sad);
if (sad < best_sad) {
- sad += mvsad_err_cost(&mv, &fcenter_mv,
- mvjsadcost, mvsadcost, error_per_bit);
+ sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
best_sad = sad;
best_site = j;
diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h
index f7b7c5e49..70d7985e4 100644
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -110,14 +110,12 @@ typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x,
const MV *ref_mv, int sad_per_bit,
int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv, MV *best_mv);
typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x,
MV *ref_mv, int sad_per_bit,
int distance,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv);
typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
@@ -125,14 +123,12 @@ typedef int (*vp9_diamond_search_fn_t)(const MACROBLOCK *x,
int search_param, int sad_per_bit,
int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv);
int vp9_refining_search_8p_c(const MACROBLOCK *x,
MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
- int *mvjcost, int *mvcost[2],
const MV *center_mv, const uint8_t *second_pred,
int w, int h);
#ifdef __cplusplus
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index 0ac9d5f05..03f3c87a2 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -61,11 +61,6 @@ void vp9_coef_tree_initialize();
// now so that HIGH_PRECISION is always
// chosen.
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
-#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
-
// #define OUTPUT_YUV_REC
#ifdef OUTPUT_YUV_SRC
@@ -386,117 +381,119 @@ static int is_slowest_mode(int mode) {
static void set_rd_speed_thresholds(VP9_COMP *cpi) {
int i;
+ RD_OPT *const rd = &cpi->rd;
// Set baseline threshold values
for (i = 0; i < MAX_MODES; ++i)
- cpi->rd_thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
-
- cpi->rd_thresh_mult[THR_NEARESTMV] = 0;
- cpi->rd_thresh_mult[THR_NEARESTG] = 0;
- cpi->rd_thresh_mult[THR_NEARESTA] = 0;
-
- cpi->rd_thresh_mult[THR_DC] += 1000;
-
- cpi->rd_thresh_mult[THR_NEWMV] += 1000;
- cpi->rd_thresh_mult[THR_NEWA] += 1000;
- cpi->rd_thresh_mult[THR_NEWG] += 1000;
-
- cpi->rd_thresh_mult[THR_NEARMV] += 1000;
- cpi->rd_thresh_mult[THR_NEARA] += 1000;
- cpi->rd_thresh_mult[THR_COMP_NEARESTLA] += 1000;
- cpi->rd_thresh_mult[THR_COMP_NEARESTGA] += 1000;
-
- cpi->rd_thresh_mult[THR_TM] += 1000;
-
- cpi->rd_thresh_mult[THR_COMP_NEARLA] += 1500;
- cpi->rd_thresh_mult[THR_COMP_NEWLA] += 2000;
- cpi->rd_thresh_mult[THR_NEARG] += 1000;
- cpi->rd_thresh_mult[THR_COMP_NEARGA] += 1500;
- cpi->rd_thresh_mult[THR_COMP_NEWGA] += 2000;
-
- cpi->rd_thresh_mult[THR_ZEROMV] += 2000;
- cpi->rd_thresh_mult[THR_ZEROG] += 2000;
- cpi->rd_thresh_mult[THR_ZEROA] += 2000;
- cpi->rd_thresh_mult[THR_COMP_ZEROLA] += 2500;
- cpi->rd_thresh_mult[THR_COMP_ZEROGA] += 2500;
-
- cpi->rd_thresh_mult[THR_H_PRED] += 2000;
- cpi->rd_thresh_mult[THR_V_PRED] += 2000;
- cpi->rd_thresh_mult[THR_D45_PRED ] += 2500;
- cpi->rd_thresh_mult[THR_D135_PRED] += 2500;
- cpi->rd_thresh_mult[THR_D117_PRED] += 2500;
- cpi->rd_thresh_mult[THR_D153_PRED] += 2500;
- cpi->rd_thresh_mult[THR_D207_PRED] += 2500;
- cpi->rd_thresh_mult[THR_D63_PRED] += 2500;
+ rd->thresh_mult[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+
+ rd->thresh_mult[THR_NEARESTMV] = 0;
+ rd->thresh_mult[THR_NEARESTG] = 0;
+ rd->thresh_mult[THR_NEARESTA] = 0;
+
+ rd->thresh_mult[THR_DC] += 1000;
+
+ rd->thresh_mult[THR_NEWMV] += 1000;
+ rd->thresh_mult[THR_NEWA] += 1000;
+ rd->thresh_mult[THR_NEWG] += 1000;
+
+ rd->thresh_mult[THR_NEARMV] += 1000;
+ rd->thresh_mult[THR_NEARA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
+ rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
+
+ rd->thresh_mult[THR_TM] += 1000;
+
+ rd->thresh_mult[THR_COMP_NEARLA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWLA] += 2000;
+ rd->thresh_mult[THR_NEARG] += 1000;
+ rd->thresh_mult[THR_COMP_NEARGA] += 1500;
+ rd->thresh_mult[THR_COMP_NEWGA] += 2000;
+
+ rd->thresh_mult[THR_ZEROMV] += 2000;
+ rd->thresh_mult[THR_ZEROG] += 2000;
+ rd->thresh_mult[THR_ZEROA] += 2000;
+ rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
+ rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
+
+ rd->thresh_mult[THR_H_PRED] += 2000;
+ rd->thresh_mult[THR_V_PRED] += 2000;
+ rd->thresh_mult[THR_D45_PRED ] += 2500;
+ rd->thresh_mult[THR_D135_PRED] += 2500;
+ rd->thresh_mult[THR_D117_PRED] += 2500;
+ rd->thresh_mult[THR_D153_PRED] += 2500;
+ rd->thresh_mult[THR_D207_PRED] += 2500;
+ rd->thresh_mult[THR_D63_PRED] += 2500;
/* disable frame modes if flags not set */
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
- cpi->rd_thresh_mult[THR_NEWMV ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEARESTMV] = INT_MAX;
- cpi->rd_thresh_mult[THR_ZEROMV ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEARMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEWMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
+ rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
+ rd->thresh_mult[THR_NEARMV ] = INT_MAX;
}
if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
- cpi->rd_thresh_mult[THR_NEARESTG ] = INT_MAX;
- cpi->rd_thresh_mult[THR_ZEROG ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEARG ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEWG ] = INT_MAX;
+ rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROG ] = INT_MAX;
+ rd->thresh_mult[THR_NEARG ] = INT_MAX;
+ rd->thresh_mult[THR_NEWG ] = INT_MAX;
}
if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
- cpi->rd_thresh_mult[THR_NEARESTA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_ZEROA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEARA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_NEWA ] = INT_MAX;
+ rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
+ rd->thresh_mult[THR_ZEROA ] = INT_MAX;
+ rd->thresh_mult[THR_NEARA ] = INT_MAX;
+ rd->thresh_mult[THR_NEWA ] = INT_MAX;
}
if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
(VP9_LAST_FLAG | VP9_ALT_FLAG)) {
- cpi->rd_thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
}
if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
(VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
- cpi->rd_thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
- cpi->rd_thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
+ rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
}
}
static void set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
const SPEED_FEATURES *const sf = &cpi->sf;
+ RD_OPT *const rd = &cpi->rd;
int i;
for (i = 0; i < MAX_REFS; ++i)
- cpi->rd_thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
+ rd->thresh_mult_sub8x8[i] = is_slowest_mode(cpi->oxcf.mode) ? -500 : 0;
- cpi->rd_thresh_mult_sub8x8[THR_LAST] += 2500;
- cpi->rd_thresh_mult_sub8x8[THR_GOLD] += 2500;
- cpi->rd_thresh_mult_sub8x8[THR_ALTR] += 2500;
- cpi->rd_thresh_mult_sub8x8[THR_INTRA] += 2500;
- cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] += 4500;
- cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] += 4500;
+ rd->thresh_mult_sub8x8[THR_LAST] += 2500;
+ rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
+ rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
+ rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
+ rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
+ rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
// Check for masked out split cases.
for (i = 0; i < MAX_REFS; i++)
if (sf->disable_split_mask & (1 << i))
- cpi->rd_thresh_mult_sub8x8[i] = INT_MAX;
+ rd->thresh_mult_sub8x8[i] = INT_MAX;
// disable mode test if frame flag is not set
if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
- cpi->rd_thresh_mult_sub8x8[THR_LAST] = INT_MAX;
+ rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
- cpi->rd_thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
+ rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
- cpi->rd_thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
+ rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
(VP9_LAST_FLAG | VP9_ALT_FLAG))
- cpi->rd_thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
+ rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
(VP9_GOLD_FLAG | VP9_ALT_FLAG))
- cpi->rd_thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
+ rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;
}
static void set_speed_features(VP9_COMP *cpi) {
@@ -627,50 +624,8 @@ static void update_frame_size(VP9_COMP *cpi) {
}
void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
- VP9_COMMON *const cm = &cpi->common;
- RATE_CONTROL *const rc = &cpi->rc;
- VP9_CONFIG *const oxcf = &cpi->oxcf;
- int vbr_max_bits;
-
- oxcf->framerate = framerate < 0.1 ? 30 : framerate;
- cpi->output_framerate = cpi->oxcf.framerate;
- rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth /
- cpi->output_framerate);
- rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
- oxcf->two_pass_vbrmin_section / 100);
-
- rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
-
- // A maximum bitrate for a frame is defined.
- // The baseline for this aligns with HW implementations that
- // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
- // per 16x16 MB (averaged over a frame). However this limit is extended if
- // a very high rate is given on the command line or the the rate cannnot
- // be acheived because of a user specificed max q (e.g. when the user
- // specifies lossless encode.
- //
- vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
- oxcf->two_pass_vbrmax_section) / 100);
- rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
- vbr_max_bits);
-
- // Set Maximum gf/arf interval
- rc->max_gf_interval = 16;
-
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
-
- // Special conditions when alt ref frame enabled in lagged compress mode
- if (oxcf->play_alternate && oxcf->lag_in_frames) {
- if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
- rc->max_gf_interval = oxcf->lag_in_frames - 1;
-
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
-
- if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
- rc->max_gf_interval = rc->static_scene_max_gf_interval;
+ cpi->oxcf.framerate = framerate < 0.1 ? 30 : framerate;
+ vp9_rc_update_framerate(cpi);
}
int64_t vp9_rescale(int64_t val, int64_t num, int denom) {
@@ -860,10 +815,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
cm->display_width = cpi->oxcf.width;
cm->display_height = cpi->oxcf.height;
- // VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
- cpi->oxcf.sharpness = MIN(7, cpi->oxcf.sharpness);
-
- cpi->common.lf.sharpness_level = cpi->oxcf.sharpness;
+ cm->lf.sharpness_level = cpi->oxcf.sharpness;
if (cpi->initial_width) {
// Increasing the size of the frame beyond the first seen frame, or some
@@ -883,10 +835,6 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
cpi->speed = abs(cpi->oxcf.cpu_used);
- // Limit on lag buffers as these are not currently dynamically allocated.
- if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
- cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
-
#if CONFIG_MULTIPLE_ARF
vp9_zero(cpi->alt_ref_source);
#else
@@ -906,7 +854,9 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9_CONFIG *oxcf) {
cpi->ext_refresh_frame_context_pending = 0;
}
+#ifndef M_LOG2_E
#define M_LOG2_E 0.693147180559945309417
+#endif
#define log2f(x) (log (x) / (float) M_LOG2_E)
static void cal_nmvjointsadcost(int *mvjointsadcost) {
@@ -1275,9 +1225,9 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
// Default rd threshold factors for mode selection
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j)
- cpi->rd_thresh_freq_fact[i][j] = 32;
+ cpi->rd.thresh_freq_fact[i][j] = 32;
for (j = 0; j < MAX_REFS; ++j)
- cpi->rd_thresh_freq_sub8x8[i][j] = 32;
+ cpi->rd.thresh_freq_sub8x8[i][j] = 32;
}
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SVFHH, SVFHV, SVFHHV, \
@@ -1386,8 +1336,6 @@ VP9_COMP *vp9_create_compressor(VP9_CONFIG *oxcf) {
cm->error.setjmp = 0;
- vp9_zero(cpi->common.counts.uv_mode);
-
#ifdef MODE_TEST_HIT_STATS
vp9_zero(cpi->mode_test_hits);
#endif
@@ -1787,7 +1735,9 @@ static void scale_and_extend_frame_nonnormative(YV12_BUFFER_CONFIG *src_fb,
dsts[i], out_h_uv, out_w_uv, dst_strides[i]);
}
}
- vp8_yv12_extend_frame_borders(dst_fb);
+ // TODO(hkuang): Call C version explicitly
+ // as neon version only expand border size 32.
+ vp8_yv12_extend_frame_borders_c(dst_fb);
}
static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
@@ -1828,7 +1778,9 @@ static void scale_and_extend_frame(YV12_BUFFER_CONFIG *src_fb,
}
}
- vp8_yv12_extend_frame_borders(dst_fb);
+ // TODO(hkuang): Call C version explicitly
+ // as neon version only expand border size 32.
+ vp8_yv12_extend_frame_borders_c(dst_fb);
}
static int find_fp_qindex() {
@@ -2538,7 +2490,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_clear_system_state();
- vp9_zero(cpi->rd_tx_select_threshes);
+ vp9_zero(cpi->rd.tx_select_threshes);
#if CONFIG_VP9_POSTPROC
if (cpi->oxcf.noise_sensitivity > 0) {
@@ -2647,22 +2599,23 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
output_frame_level_debug_stats(cpi);
#endif
if (cpi->refresh_golden_frame == 1)
- cm->frame_flags |= FRAMEFLAGS_GOLDEN;
+ cpi->frame_flags |= FRAMEFLAGS_GOLDEN;
else
- cm->frame_flags &= ~FRAMEFLAGS_GOLDEN;
+ cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN;
if (cpi->refresh_alt_ref_frame == 1)
- cm->frame_flags |= FRAMEFLAGS_ALTREF;
+ cpi->frame_flags |= FRAMEFLAGS_ALTREF;
else
- cm->frame_flags &= ~FRAMEFLAGS_ALTREF;
+ cpi->frame_flags &= ~FRAMEFLAGS_ALTREF;
get_ref_frame_flags(cpi);
+ cm->last_frame_type = cm->frame_type;
vp9_rc_postencode_update(cpi, *size);
if (cm->frame_type == KEY_FRAME) {
// Tell the caller that the frame was coded as a key frame
- *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY;
+ *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY;
#if CONFIG_MULTIPLE_ARF
// Reset the sequence number.
@@ -2673,7 +2626,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
#endif
} else {
- *frame_flags = cm->frame_flags&~FRAMEFLAGS_KEY;
+ *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY;
#if CONFIG_MULTIPLE_ARF
/* Increment position in the coded frame sequence. */
@@ -3042,7 +2995,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
#endif
- cm->frame_flags = *frame_flags;
+ cpi->frame_flags = *frame_flags;
+
+ if (cpi->pass == 2 &&
+ cm->current_video_frame == 0 &&
+ cpi->oxcf.allow_spatial_resampling &&
+ cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) {
+ // Internal scaling is triggered on the first frame.
+ vp9_set_size_literal(cpi, cpi->oxcf.scaled_frame_width,
+ cpi->oxcf.scaled_frame_height);
+ }
// Reset the frame pointers to the current frame size
vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h
index 18203f96a..7a110ac4c 100644
--- a/vp9/encoder/vp9_onyx_int.h
+++ b/vp9/encoder/vp9_onyx_int.h
@@ -185,6 +185,7 @@ typedef enum {
AQ_MODE_COUNT // This should always be the last member of the enum
} AQ_MODE;
+
typedef struct VP9_CONFIG {
BITSTREAM_PROFILE profile;
BIT_DEPTH bit_depth;
@@ -231,6 +232,11 @@ typedef struct VP9_CONFIG {
int lossless;
AQ_MODE aq_mode; // Adaptive Quantization mode
+ // Internal frame size scaling.
+ int allow_spatial_resampling;
+ int scaled_frame_width;
+ int scaled_frame_height;
+
// Enable feature to reduce the frame quantization every x frames.
int frame_periodic_boost;
@@ -281,6 +287,35 @@ typedef struct VP9_CONFIG {
vp8e_tuning tuning;
} VP9_CONFIG;
+
+typedef struct RD_OPT {
+ // Thresh_mult is used to set a threshold for the rd score. A higher value
+ // means that we will accept the best mode so far more often. This number
+ // is used in combination with the current block size, and thresh_freq_fact
+ // to pick a threshold.
+ int thresh_mult[MAX_MODES];
+ int thresh_mult_sub8x8[MAX_REFS];
+
+ int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
+ int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+ int thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
+ int thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
+
+ int64_t comp_pred_diff[REFERENCE_MODES];
+ int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t tx_select_diff[TX_MODES];
+ // FIXME(rbultje) can this overflow?
+ int tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
+
+ int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+ int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
+ int64_t mask_filter;
+
+ int RDMULT;
+ int RDDIV;
+} RD_OPT;
+
typedef struct VP9_COMP {
QUANTS quants;
MACROBLOCK mb;
@@ -343,31 +378,7 @@ typedef struct VP9_COMP {
// Ambient reconstruction err target for force key frames
int ambient_err;
- // Thresh_mult is used to set a threshold for the rd score. A higher value
- // means that we will accept the best mode so far more often. This number
- // is used in combination with the current block size, and thresh_freq_fact
- // to pick a threshold.
- int rd_thresh_mult[MAX_MODES];
- int rd_thresh_mult_sub8x8[MAX_REFS];
-
- int rd_threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES];
- int rd_thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
- int rd_thresh_sub8x8[MAX_SEGMENTS][BLOCK_SIZES][MAX_REFS];
- int rd_thresh_freq_sub8x8[BLOCK_SIZES][MAX_REFS];
-
- int64_t rd_comp_pred_diff[REFERENCE_MODES];
- int64_t rd_prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
- int64_t rd_tx_select_diff[TX_MODES];
- // FIXME(rbultje) can this overflow?
- int rd_tx_select_threshes[MAX_REF_FRAMES][TX_MODES];
-
- int64_t rd_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
- int64_t rd_filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
- int64_t rd_filter_cache[SWITCHABLE_FILTER_CONTEXTS];
- int64_t mask_filter_rd;
-
- int RDMULT;
- int RDDIV;
+ RD_OPT rd;
CODING_CONTEXT coding_context;
@@ -376,7 +387,6 @@ typedef struct VP9_COMP {
int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength
- double output_framerate;
int64_t last_time_stamp_seen;
int64_t last_end_time_stamp_seen;
int64_t first_time_stamp_ever;
@@ -498,6 +508,8 @@ typedef struct VP9_COMP {
int use_large_partition_rate;
+ int frame_flags;
+
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
@@ -591,7 +603,7 @@ static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
-static INLINE int vp9_frame_is_boosted(const VP9_COMP *cpi) {
+static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref);
}
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 3ac85228b..e003a0f42 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -23,7 +23,7 @@
#include "vp9/encoder/vp9_picklpf.h"
#include "vp9/encoder/vp9_quantize.h"
-static int get_max_filter_level(VP9_COMP *cpi) {
+static int get_max_filter_level(const VP9_COMP *cpi) {
return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
: MAX_LOOP_FILTER;
}
@@ -43,15 +43,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
return filt_err;
}
-static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
- int partial_frame) {
- VP9_COMMON *const cm = &cpi->common;
- struct loopfilter *const lf = &cm->lf;
+static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
+ int partial_frame) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const struct loopfilter *const lf = &cm->lf;
const int min_filter_level = 0;
const int max_filter_level = get_max_filter_level(cpi);
- int best_err;
- int filt_best;
int filt_direction = 0;
+ int best_err, filt_best;
+
// Start the search at the previous frame filter level unless it is now out of
// range.
int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
@@ -128,7 +128,7 @@ static void search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
}
}
- lf->filter_level = filt_best;
+ return filt_best;
}
void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
@@ -150,6 +150,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
filt_guess -= 4;
lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
- search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE);
+ lf->filter_level = search_filter_level(sd, cpi,
+ method == LPF_PICK_FROM_SUBIMAGE);
}
}
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index f3fe99cdb..3d398edc9 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -233,8 +233,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const int64_t intra_mode_cost = 50;
unsigned char segment_id = mbmi->segment_id;
- const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
- const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+ const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
+ const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
// Mode index conversion form THR_MODES to MB_PREDICTION_MODE for a ref frame.
int mode_idx[MB_MODE_COUNT] = {0};
INTERP_FILTER filter_ref = SWITCHABLE;
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 342081644..76ec84b5f 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -27,6 +27,11 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ratectrl.h"
+// Max rate target for 1080P and below encodes under normal circumstances
+// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+#define MAX_MB_RATE 250
+#define MAXRATE_1080P 2025000
+
#define DEFAULT_KF_BOOST 2000
#define DEFAULT_GF_BOOST 2000
@@ -74,14 +79,13 @@ void vp9_rc_init_minq_luts() {
for (i = 0; i < QINDEX_RANGE; i++) {
const double maxq = vp9_convert_qindex_to_q(i);
-
kf_low_motion_minq[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.15);
kf_high_motion_minq[i] = get_minq_index(maxq, 0.000002, -0.0012, 0.50);
gf_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.32);
gf_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.50);
afq_low_motion_minq[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.33);
afq_high_motion_minq[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55);
- inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.75);
+ inter_minq[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.55);
}
}
@@ -367,8 +371,8 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
// Calculate required scaling factor based on target frame size and size of
// frame produced using previous Q.
- target_bits_per_mb =
- ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
+ target_bits_per_mb =
+ ((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs;
i = active_best_quality;
@@ -565,11 +569,18 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
// Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
- if (!(cm->current_video_frame == 0))
- *top_index = (active_worst_quality + active_best_quality * 3) / 4;
+ if (cm->frame_type == KEY_FRAME &&
+ !rc->this_key_frame_forced &&
+ !(cm->current_video_frame == 0)) {
+ int qdelta = 0;
+ vp9_clear_system_state();
+ qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, 2.0);
+ *top_index = active_worst_quality + qdelta;
+ *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
#endif
+
// Special case code to try and match quality with forced key frames
if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
@@ -725,15 +736,26 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
*bottom_index = active_best_quality;
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
- // Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
- if (!(cm->current_video_frame == 0))
- *top_index = (active_worst_quality + active_best_quality * 3) / 4;
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- *top_index = (active_worst_quality + active_best_quality) / 2;
+ {
+ int qdelta = 0;
+ vp9_clear_system_state();
+
+ // Limit Q range for the adaptive loop.
+ if (cm->frame_type == KEY_FRAME &&
+ !rc->this_key_frame_forced &&
+ !(cm->current_video_frame == 0)) {
+ qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, 2.0);
+ } else if (!rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, 1.75);
+ }
+ *top_index = active_worst_quality + qdelta;
+ *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
#endif
+
if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) {
q = active_best_quality;
// Special case code to try and match quality with forced key frames
@@ -907,13 +929,22 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
*bottom_index = active_best_quality;
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
- // Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
- *top_index = (active_worst_quality + active_best_quality * 3) / 4;
- } else if (!rc->is_src_frame_alt_ref &&
- (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
- *top_index = (active_worst_quality + active_best_quality) / 2;
+ {
+ int qdelta = 0;
+ vp9_clear_system_state();
+
+ // Limit Q range for the adaptive loop.
+ if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced) {
+ qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, 2.0);
+ } else if (!rc->is_src_frame_alt_ref &&
+ (oxcf->end_usage != USAGE_STREAM_FROM_SERVER) &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ qdelta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type,
+ active_worst_quality, 1.75);
+ }
+ *top_index = active_worst_quality + qdelta;
+ *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
}
#endif
@@ -1065,11 +1096,11 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
}
void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
- VP9_COMMON *const cm = &cpi->common;
+ const VP9_COMMON *const cm = &cpi->common;
const VP9_CONFIG *const oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ const int qindex = cm->base_qindex;
- cm->last_frame_type = cm->frame_type;
// Update rate control heuristics
rc->projected_frame_size = (int)(bytes_used << 3);
@@ -1080,25 +1111,24 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
// Keep a record of last Q and ambient average Q.
if (cm->frame_type == KEY_FRAME) {
- rc->last_q[KEY_FRAME] = cm->base_qindex;
- rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(
- 3 * rc->avg_frame_qindex[KEY_FRAME] + cm->base_qindex, 2);
+ rc->last_q[KEY_FRAME] = qindex;
+ rc->avg_frame_qindex[KEY_FRAME] =
+ ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
} else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
- !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
- rc->last_q[2] = cm->base_qindex;
- rc->avg_frame_qindex[2] = ROUND_POWER_OF_TWO(
- 3 * rc->avg_frame_qindex[2] + cm->base_qindex, 2);
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) &&
+ !(cpi->use_svc && oxcf->end_usage == USAGE_STREAM_FROM_SERVER)) {
+ rc->last_q[2] = qindex;
+ rc->avg_frame_qindex[2] =
+ ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[2] + qindex, 2);
} else {
- rc->last_q[INTER_FRAME] = cm->base_qindex;
- rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(
- 3 * rc->avg_frame_qindex[INTER_FRAME] + cm->base_qindex, 2);
+ rc->last_q[INTER_FRAME] = qindex;
+ rc->avg_frame_qindex[INTER_FRAME] =
+ ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
rc->ni_frames++;
- rc->tot_q += vp9_convert_qindex_to_q(cm->base_qindex);
- rc->avg_q = rc->tot_q / (double)rc->ni_frames;
-
+ rc->tot_q += vp9_convert_qindex_to_q(qindex);
+ rc->avg_q = rc->tot_q / rc->ni_frames;
// Calculate the average Q for normal inter frames (not key or GFU frames).
- rc->ni_tot_qi += cm->base_qindex;
+ rc->ni_tot_qi += qindex;
rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
}
@@ -1107,11 +1137,11 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
// If all mbs in this group are skipped only update if the Q value is
// better than that already stored.
// This is used to help set quality in forced key frames to reduce popping
- if ((cm->base_qindex < rc->last_boosted_qindex) ||
+ if ((qindex < rc->last_boosted_qindex) ||
((cpi->static_mb_pct < 100) &&
((cm->frame_type == KEY_FRAME) || cpi->refresh_alt_ref_frame ||
(cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
- rc->last_boosted_qindex = cm->base_qindex;
+ rc->last_boosted_qindex = qindex;
}
update_buffer_level(cpi, rc->projected_frame_size);
@@ -1196,7 +1226,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
int target;
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 ||
- (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY) ||
rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
@@ -1258,17 +1288,25 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const RATE_CONTROL *rc = &cpi->rc;
+ const VP9_CONFIG *oxcf = &cpi->oxcf;
+ const SVC *const svc = &cpi->svc;
int target;
-
if (cpi->common.current_video_frame == 0) {
target = ((cpi->oxcf.starting_buffer_level / 2) > INT_MAX)
? INT_MAX : (int)(cpi->oxcf.starting_buffer_level / 2);
} else {
- const int initial_boost = 32;
- int kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
- if (rc->frames_since_key < cpi->output_framerate / 2) {
+ int kf_boost = 32;
+ double framerate = oxcf->framerate;
+ if (svc->number_temporal_layers > 1 &&
+ oxcf->end_usage == USAGE_STREAM_FROM_SERVER) {
+ // Use the layer framerate for temporal layers CBR mode.
+ const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+ framerate = lc->framerate;
+ }
+ kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
+ if (rc->frames_since_key < framerate / 2) {
kf_boost = (int)(kf_boost * rc->frames_since_key /
- (cpi->output_framerate / 2));
+ (framerate / 2));
}
target = ((16 + kf_boost) * rc->av_per_frame_bandwidth) >> 4;
}
@@ -1280,7 +1318,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
int target = rc->av_per_frame_bandwidth;
if ((cm->current_video_frame == 0) ||
- (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key && (rc->frames_since_key %
cpi->key_frame_frequency == 0))) {
cm->frame_type = KEY_FRAME;
@@ -1304,7 +1342,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
int target;
if ((cm->current_video_frame == 0 ||
- (cm->frame_flags & FRAMEFLAGS_KEY) ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY) ||
rc->frames_to_key == 0 ||
(cpi->oxcf.auto_key && test_for_kf_one_pass(cpi)))) {
cm->frame_type = KEY_FRAME;
@@ -1366,3 +1404,46 @@ int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
return target_index - qindex;
}
+
+void vp9_rc_update_framerate(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const VP9_CONFIG *const oxcf = &cpi->oxcf;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int vbr_max_bits;
+
+ rc->av_per_frame_bandwidth = (int)(oxcf->target_bandwidth / oxcf->framerate);
+ rc->min_frame_bandwidth = (int)(rc->av_per_frame_bandwidth *
+ oxcf->two_pass_vbrmin_section / 100);
+
+ rc->min_frame_bandwidth = MAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
+
+ // A maximum bitrate for a frame is defined.
+ // The baseline for this aligns with HW implementations that
+ // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
+ // per 16x16 MB (averaged over a frame). However this limit is extended if
+ // a very high rate is given on the command line or the the rate cannnot
+ // be acheived because of a user specificed max q (e.g. when the user
+ // specifies lossless encode.
+ vbr_max_bits = (int)(((int64_t)rc->av_per_frame_bandwidth *
+ oxcf->two_pass_vbrmax_section) / 100);
+ rc->max_frame_bandwidth = MAX(MAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P),
+ vbr_max_bits);
+
+ // Set Maximum gf/arf interval
+ rc->max_gf_interval = 16;
+
+ // Extended interval for genuinely static scenes
+ rc->static_scene_max_gf_interval = cpi->key_frame_frequency >> 1;
+
+ // Special conditions when alt ref frame enabled in lagged compress mode
+ if (oxcf->play_alternate && oxcf->lag_in_frames) {
+ if (rc->max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->max_gf_interval = oxcf->lag_in_frames - 1;
+
+ if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
+ rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
+ }
+
+ if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
+ rc->max_gf_interval = rc->static_scene_max_gf_interval;
+}
diff --git a/vp9/encoder/vp9_ratectrl.h b/vp9/encoder/vp9_ratectrl.h
index 7693c2b13..cf6526b8b 100644
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -125,8 +125,7 @@ void vp9_rc_get_svc_params(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters based
// on bytes used
-void vp9_rc_postencode_update(struct VP9_COMP *cpi,
- uint64_t bytes_used);
+void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used);
// Post encode update of the rate control parameters for dropped frames
void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
@@ -175,6 +174,8 @@ int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget);
int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type,
int qindex, double rate_target_ratio);
+void vp9_rc_update_framerate(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 1d70c1f9e..7ef21fa5d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -81,7 +81,7 @@ struct rdcost_block_args {
const scan_order *so;
};
-const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
+static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{NEARESTMV, {LAST_FRAME, NONE}},
{NEARESTMV, {ALTREF_FRAME, NONE}},
{NEARESTMV, {GOLDEN_FRAME, NONE}},
@@ -121,7 +121,7 @@ const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
{D45_PRED, {INTRA_FRAME, NONE}},
};
-const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
+static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
{{LAST_FRAME, NONE}},
{{GOLDEN_FRAME, NONE}},
{{ALTREF_FRAME, NONE}},
@@ -134,8 +134,9 @@ const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
// certain modes are assumed to be based on 8x8 blocks.
// This table is used to correct for blocks size.
// The factors here are << 2 (2 = x0.5, 32 = x8 etc).
-static int rd_thresh_block_size_factor[BLOCK_SIZES] =
- {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
+static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
+ 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
+};
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
@@ -192,7 +193,7 @@ static void fill_token_costs(vp9_coeff_cost *c,
}
}
-static const int rd_iifactor[32] = {
+static const uint8_t rd_iifactor[32] = {
4, 4, 3, 2, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -244,6 +245,7 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
static void set_block_thresholds(VP9_COMP *cpi) {
const VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd = &cpi->rd;
int i, bsize, segment_id;
for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
@@ -259,14 +261,14 @@ static void set_block_thresholds(VP9_COMP *cpi) {
const int thresh_max = INT_MAX / t;
for (i = 0; i < MAX_MODES; ++i)
- cpi->rd_threshes[segment_id][bsize][i] =
- cpi->rd_thresh_mult[i] < thresh_max ? cpi->rd_thresh_mult[i] * t / 4
+ rd->threshes[segment_id][bsize][i] =
+ rd->thresh_mult[i] < thresh_max ? rd->thresh_mult[i] * t / 4
: INT_MAX;
for (i = 0; i < MAX_REFS; ++i) {
- cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
- cpi->rd_thresh_mult_sub8x8[i] < thresh_max
- ? cpi->rd_thresh_mult_sub8x8[i] * t / 4
+ rd->thresh_sub8x8[segment_id][bsize][i] =
+ rd->thresh_mult_sub8x8[i] < thresh_max
+ ? rd->thresh_mult_sub8x8[i] * t / 4
: INT_MAX;
}
}
@@ -280,10 +282,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_clear_system_state();
- cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
- cpi->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
+ cpi->rd.RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
+ cpi->rd.RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
- x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
+ x->errorperbit = cpi->rd.RDMULT / RD_MULT_EPB_RATIO;
x->errorperbit += (x->errorperbit == 0);
x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
@@ -1675,14 +1677,14 @@ static INLINE int mv_has_subpel(const MV *mv) {
static int check_best_zero_mv(
const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
- int disable_inter_mode_mask, int this_mode, int ref_frame,
- int second_ref_frame) {
+ int disable_inter_mode_mask, int this_mode,
+ const MV_REFERENCE_FRAME ref_frames[2]) {
if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
(this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
- frame_mv[this_mode][ref_frame].as_int == 0 &&
- (second_ref_frame == NONE ||
- frame_mv[this_mode][second_ref_frame].as_int == 0)) {
- int rfc = mode_context[ref_frame];
+ frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
+ (ref_frames[1] == NONE ||
+ frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
+ int rfc = mode_context[ref_frames[0]];
int c1 = cost_mv_ref(cpi, NEARMV, rfc);
int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
@@ -1693,15 +1695,15 @@ static int check_best_zero_mv(
if (c2 > c3) return 0;
} else {
assert(this_mode == ZEROMV);
- if (second_ref_frame == NONE) {
- if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0) ||
- (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0))
+ if (ref_frames[1] == NONE) {
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
return 0;
} else {
- if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frame].as_int == 0 &&
- frame_mv[NEARESTMV][second_ref_frame].as_int == 0) ||
- (c3 >= c1 && frame_mv[NEARMV][ref_frame].as_int == 0 &&
- frame_mv[NEARMV][second_ref_frame].as_int == 0))
+ if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
+ (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
+ frame_mv[NEARMV][ref_frames[1]].as_int == 0))
return 0;
}
}
@@ -1779,8 +1781,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
disable_inter_mode_mask,
- this_mode, mbmi->ref_frame[0],
- mbmi->ref_frame[1]))
+ this_mode, mbmi->ref_frame))
continue;
vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
@@ -1890,7 +1891,6 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
x->mv_row_min, x->mv_row_max);
thissme = cpi->full_search_sad(x, &mvp_full,
sadpb, 16, v_fn_ptr,
- x->nmvjointcost, x->mvcost,
&bsi->ref_mv[0]->as_mv,
&best_mv->as_mv);
if (thissme < bestsme) {
@@ -2267,7 +2267,7 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int_mv *ref_mv,
int_mv *second_ref_mv,
int64_t comp_pred_diff[REFERENCE_MODES],
- int64_t tx_size_diff[TX_MODES],
+ const int64_t tx_size_diff[TX_MODES],
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2579,7 +2579,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
struct buf_2d ref_yv12[2];
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;
- int_mv tmp_mv;
+ MV tmp_mv;
int search_range = 3;
int tmp_col_min = x->mv_col_min;
@@ -2608,20 +2608,19 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
// Use mv result from single mode as mvp.
- tmp_mv.as_int = frame_mv[refs[id]].as_int;
+ tmp_mv = frame_mv[refs[id]].as_mv;
- tmp_mv.as_mv.col >>= 3;
- tmp_mv.as_mv.row >>= 3;
+ tmp_mv.col >>= 3;
+ tmp_mv.row >>= 3;
// Small-range full-pixel motion search
- bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
+ bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
search_range,
&cpi->fn_ptr[bsize],
- x->nmvjointcost, x->mvcost,
&ref_mv[id].as_mv, second_pred,
pw, ph);
if (bestsme < INT_MAX)
- bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+ bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
second_pred, &cpi->fn_ptr[bsize], 1);
x->mv_col_min = tmp_col_min;
@@ -2633,7 +2632,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
int dis; /* TODO: use dis in distortion calculation later. */
unsigned int sse;
bestsme = cpi->find_fractional_mv_step_comp(
- x, &tmp_mv.as_mv,
+ x, &tmp_mv,
&ref_mv[id].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
@@ -2648,7 +2647,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[0].pre[0] = scaled_first_yv12;
if (bestsme < last_besterr[id]) {
- frame_mv[refs[id]].as_int = tmp_mv.as_int;
+ frame_mv[refs[id]].as_mv = tmp_mv;
last_besterr[id] = bestsme;
} else {
break;
@@ -2698,6 +2697,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int64_t *psse,
const int64_t ref_best_rd) {
VP9_COMMON *cm = &cpi->common;
+ RD_OPT *rd_opt = &cpi->rd;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int is_comp_pred = has_second_ref(mbmi);
@@ -2795,14 +2795,13 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Search for best switchable filter by checking the variance of
// pred error irrespective of whether the filter will be used
- cpi->mask_filter_rd = 0;
+ rd_opt->mask_filter = 0;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- cpi->rd_filter_cache[i] = INT64_MAX;
+ rd_opt->filter_cache[i] = INT64_MAX;
if (cm->interp_filter != BILINEAR) {
*best_filter = EIGHTTAP;
- if (x->source_variance <
- cpi->sf.disable_filter_search_var_thresh) {
+ if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
*best_filter = EIGHTTAP;
} else {
int newbest;
@@ -2818,12 +2817,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (i > 0 && intpel_mv) {
rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
- cpi->rd_filter_cache[i] = rd;
- cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ rd_opt->filter_cache[i] = rd;
+ rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+ MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+ rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
} else {
int rate_sum = 0;
int64_t dist_sum = 0;
@@ -2843,12 +2842,12 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
- cpi->rd_filter_cache[i] = rd;
- cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
+ rd_opt->filter_cache[i] = rd;
+ rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+ MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
rd += rs_rd;
- cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
+ rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
if (i == 0 && intpel_mv) {
tmp_rate_sum = rate_sum;
@@ -3125,6 +3124,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
@@ -3145,7 +3145,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
MB_MODE_INFO best_mbmode = { 0 };
- int mode_index, best_mode_index = 0;
+ int mode_index, best_mode_index = -1;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vp9_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
@@ -3164,8 +3164,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int best_skip2 = 0;
int mode_skip_mask = 0;
int mode_skip_start = cpi->sf.mode_skip_start + 1;
- const int *const rd_threshes = cpi->rd_threshes[segment_id][bsize];
- const int *const rd_thresh_freq_fact = cpi->rd_thresh_freq_fact[bsize];
+ const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
+ const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
const int intra_y_mode_mask =
cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
@@ -3302,7 +3302,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
- if (mode_index == mode_skip_start && best_rd < INT64_MAX) {
+ if (mode_index == mode_skip_start && best_mode_index >= 0) {
switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
case INTRA_FRAME:
break;
@@ -3339,6 +3339,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
+ best_mode_index >=0 &&
vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
continue;
if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
@@ -3366,7 +3367,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// one of the neighboring directional modes
if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
(this_mode >= D45_PRED && this_mode <= TM_PRED)) {
- if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
+ if (best_mode_index >= 0 &&
+ vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
continue;
}
if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
@@ -3376,11 +3378,12 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
} else {
if (x->in_active_map &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
- disable_inter_mode_mask, this_mode, ref_frame,
- second_ref_frame))
+ disable_inter_mode_mask, this_mode, ref_frames))
continue;
+ }
}
mbmi->mode = this_mode;
@@ -3608,21 +3611,21 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
/* keep record of best filter type */
if (!mode_excluded && cm->interp_filter != BILINEAR) {
- int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+ int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
SWITCHABLE_FILTERS : cm->interp_filter];
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
int64_t adj_rd;
if (ref == INT64_MAX)
adj_rd = 0;
- else if (cpi->rd_filter_cache[i] == INT64_MAX)
+ else if (rd_opt->filter_cache[i] == INT64_MAX)
// when early termination is triggered, the encoder does not have
// access to the rate-distortion cost. it only knows that the cost
// should be above the maximum valid value. hence it takes the known
// maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = cpi->mask_filter_rd - ref + 10;
+ adj_rd = rd_opt->mask_filter - ref + 10;
else
- adj_rd = cpi->rd_filter_cache[i] - ref;
+ adj_rd = rd_opt->filter_cache[i] - ref;
adj_rd += this_rd;
best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
@@ -3654,7 +3657,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
break;
}
- if (best_rd >= best_rd_so_far)
+ if (best_mode_index < 0 || best_rd >= best_rd_so_far)
return INT64_MAX;
// If we used an estimate for the uv intra rd in the loop above...
@@ -3684,7 +3687,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
// combination that wins out.
if (cpi->sf.adaptive_rd_thresh) {
for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
- int *const fact = &cpi->rd_thresh_freq_fact[bsize][mode_index];
+ int *const fact = &rd_opt->thresh_freq_fact[bsize][mode_index];
if (mode_index == best_mode_index) {
*fact -= (*fact >> 3);
@@ -3756,6 +3759,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
+ RD_OPT *const rd_opt = &cpi->rd;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct segmentation *const seg = &cm->seg;
@@ -3768,8 +3772,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
VP9_ALT_FLAG };
int64_t best_rd = best_rd_so_far;
int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
- int64_t best_tx_rd[TX_MODES];
- int64_t best_tx_diff[TX_MODES];
+ static const int64_t best_tx_diff[TX_MODES] = { 0 };
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
@@ -3781,10 +3784,10 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_inter_rd = INT64_MAX;
MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
INTERP_FILTER tmp_best_filter = SWITCHABLE;
- int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
- int64_t dist_uv[TX_SIZES];
- int skip_uv[TX_SIZES];
- MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
+ int rate_uv_intra, rate_uv_tokenonly;
+ int64_t dist_uv;
+ int skip_uv;
+ MB_PREDICTION_MODE mode_uv = DC_PRED;
int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
int_mv seg_mvs[4][MAX_REF_FRAMES];
b_mode_info best_bmodes[4];
@@ -3806,12 +3809,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
for (i = 0; i < REFERENCE_MODES; ++i)
best_pred_rd[i] = INT64_MAX;
- for (i = 0; i < TX_MODES; i++)
- best_tx_rd[i] = INT64_MAX;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
best_filter_rd[i] = INT64_MAX;
- for (i = 0; i < TX_SIZES; i++)
- rate_uv_intra[i] = INT_MAX;
+ rate_uv_intra = INT_MAX;
*returnrate = INT_MAX;
@@ -3845,7 +3845,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable = 0;
- int64_t tx_cache[TX_MODES];
int i;
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
@@ -3882,9 +3881,9 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Test best rd so far against threshold for trying this mode.
if ((best_rd <
- ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
- cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
- cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
+ ((int64_t)rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] *
+ rd_opt->thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
+ rd_opt->thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
continue;
if (ref_frame > INTRA_FRAME &&
@@ -3911,10 +3910,11 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// TODO(jingning, jkoleszar): scaling reference frame not supported for
// sub8x8 blocks.
- if (ref_frame > NONE && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
+ if (ref_frame > INTRA_FRAME &&
+ vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
continue;
- if (second_ref_frame > NONE &&
+ if (second_ref_frame > INTRA_FRAME &&
vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
continue;
@@ -3968,9 +3968,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
}
- for (i = 0; i < TX_MODES; ++i)
- tx_cache[i] = INT64_MAX;
-
#ifdef MODE_TEST_HIT_STATS
// TEST/DEBUG CODE
// Keep a rcord of the number of test hits at each size
@@ -3986,21 +3983,18 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
rate2 += intra_cost_penalty;
distortion2 += distortion_y;
- if (rate_uv_intra[TX_4X4] == INT_MAX) {
+ if (rate_uv_intra == INT_MAX) {
choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
- &rate_uv_intra[TX_4X4],
- &rate_uv_tokenonly[TX_4X4],
- &dist_uv[TX_4X4], &skip_uv[TX_4X4],
- &mode_uv[TX_4X4]);
+ &rate_uv_intra,
+ &rate_uv_tokenonly,
+ &dist_uv, &skip_uv,
+ &mode_uv);
}
- rate2 += rate_uv_intra[TX_4X4];
- rate_uv = rate_uv_tokenonly[TX_4X4];
- distortion2 += dist_uv[TX_4X4];
- distortion_uv = dist_uv[TX_4X4];
- mbmi->uv_mode = mode_uv[TX_4X4];
- tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < TX_MODES; ++i)
- tx_cache[i] = tx_cache[ONLY_4X4];
+ rate2 += rate_uv_intra;
+ rate_uv = rate_uv_tokenonly;
+ distortion2 += dist_uv;
+ distortion_uv = dist_uv;
+ mbmi->uv_mode = mode_uv;
} else {
int rate;
int64_t distortion;
@@ -4019,14 +4013,13 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int uv_skippable;
this_rd_thresh = (ref_frame == LAST_FRAME) ?
- cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
- cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
+ rd_opt->thresh_sub8x8[segment_id][bsize][THR_LAST] :
+ rd_opt->thresh_sub8x8[segment_id][bsize][THR_ALTR];
this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
- cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
-
- cpi->mask_filter_rd = 0;
+ rd_opt->thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
+ rd_opt->mask_filter = 0;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
- cpi->rd_filter_cache[i] = INT64_MAX;
+ rd_opt->filter_cache[i] = INT64_MAX;
if (cm->interp_filter != BILINEAR) {
tmp_best_filter = EIGHTTAP;
@@ -4059,14 +4052,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
continue;
rs = vp9_get_switchable_rate(x);
rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
- cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
- cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
- MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
+ rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
+ rd_opt->filter_cache[SWITCHABLE_FILTERS] =
+ MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
tmp_rd + rs_rd);
if (cm->interp_filter == SWITCHABLE)
tmp_rd += rs_rd;
- cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
+ rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
newbest = (tmp_rd < tmp_best_rd);
if (newbest) {
@@ -4163,10 +4156,6 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
distortion2 += distortion_uv;
skippable = skippable && uv_skippable;
total_sse += uv_sse;
-
- tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
- for (i = 0; i < TX_MODES; ++i)
- tx_cache[i] = tx_cache[ONLY_4X4];
}
}
@@ -4304,43 +4293,26 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
/* keep record of best filter type */
if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
cm->interp_filter != BILINEAR) {
- int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
+ int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
SWITCHABLE_FILTERS : cm->interp_filter];
int64_t adj_rd;
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
if (ref == INT64_MAX)
adj_rd = 0;
- else if (cpi->rd_filter_cache[i] == INT64_MAX)
+ else if (rd_opt->filter_cache[i] == INT64_MAX)
// when early termination is triggered, the encoder does not have
// access to the rate-distortion cost. it only knows that the cost
// should be above the maximum valid value. hence it takes the known
// maximum plus an arbitrary constant as the rate-distortion cost.
- adj_rd = cpi->mask_filter_rd - ref + 10;
+ adj_rd = rd_opt->mask_filter - ref + 10;
else
- adj_rd = cpi->rd_filter_cache[i] - ref;
+ adj_rd = rd_opt->filter_cache[i] - ref;
adj_rd += this_rd;
best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
}
}
- /* keep record of best txfm size */
- tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
- tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
- tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
- if (!mode_excluded && this_rd != INT64_MAX) {
- for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
- int64_t adj_rd = INT64_MAX;
- if (ref_frame > INTRA_FRAME)
- adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
- else
- adj_rd = this_rd;
-
- if (adj_rd < best_tx_rd[i])
- best_tx_rd[i] = adj_rd;
- }
- }
-
if (early_term)
break;
@@ -4355,14 +4327,12 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->sf.use_uv_intra_rd_estimate) {
// Do Intra UV best rd mode selection if best mode choice above was intra.
if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
- TX_SIZE uv_tx_size;
*mbmi = best_mbmode;
- uv_tx_size = get_uv_tx_size(mbmi);
- rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
- &rate_uv_tokenonly[uv_tx_size],
- &dist_uv[uv_tx_size],
- &skip_uv[uv_tx_size],
- BLOCK_8X8, uv_tx_size);
+ rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
+ &rate_uv_tokenonly,
+ &dist_uv,
+ &skip_uv,
+ BLOCK_8X8, TX_4X4);
}
}
@@ -4383,7 +4353,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// combination that wins out.
if (cpi->sf.adaptive_rd_thresh) {
for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
- int *const fact = &cpi->rd_thresh_freq_sub8x8[bsize][mode_index];
+ int *const fact = &rd_opt->thresh_freq_sub8x8[bsize][mode_index];
if (mode_index == best_mode_index) {
*fact -= (*fact >> 3);
@@ -4424,15 +4394,8 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
}
if (cm->interp_filter == SWITCHABLE)
assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
- for (i = 0; i < TX_MODES; i++) {
- if (best_tx_rd[i] == INT64_MAX)
- best_tx_diff[i] = 0;
- else
- best_tx_diff[i] = best_rd - best_tx_rd[i];
- }
} else {
vp9_zero(best_filter_diff);
- vp9_zero(best_tx_diff);
}
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index d6b6174fa..f0bd8a147 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -13,32 +13,39 @@
#include "vp9/encoder/vp9_onyx_int.h"
#include "vp9/encoder/vp9_speed_features.h"
-#define ALL_INTRA_MODES ((1 << DC_PRED) | \
- (1 << V_PRED) | (1 << H_PRED) | \
- (1 << D45_PRED) | (1 << D135_PRED) | \
- (1 << D117_PRED) | (1 << D153_PRED) | \
- (1 << D207_PRED) | (1 << D63_PRED) | \
- (1 << TM_PRED))
-#define INTRA_DC_ONLY (1 << DC_PRED)
-#define INTRA_DC_TM ((1 << TM_PRED) | (1 << DC_PRED))
-#define INTRA_DC_H_V ((1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED))
-#define INTRA_DC_TM_H_V (INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED))
-
-// Masks for partially or completely disabling split mode
-#define DISABLE_ALL_INTER_SPLIT ((1 << THR_COMP_GA) | \
- (1 << THR_COMP_LA) | \
- (1 << THR_ALTR) | \
- (1 << THR_GOLD) | \
- (1 << THR_LAST))
-
-#define DISABLE_ALL_SPLIT ((1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT)
-
-#define DISABLE_COMPOUND_SPLIT ((1 << THR_COMP_GA) | (1 << THR_COMP_LA))
-
-#define LAST_AND_INTRA_SPLIT_ONLY ((1 << THR_COMP_GA) | \
- (1 << THR_COMP_LA) | \
- (1 << THR_ALTR) | \
- (1 << THR_GOLD))
+enum {
+ ALL_INTRA_MODES = (1 << DC_PRED) |
+ (1 << V_PRED) | (1 << H_PRED) |
+ (1 << D45_PRED) | (1 << D135_PRED) |
+ (1 << D117_PRED) | (1 << D153_PRED) |
+ (1 << D207_PRED) | (1 << D63_PRED) |
+ (1 << TM_PRED),
+
+ INTRA_DC_ONLY = (1 << DC_PRED),
+
+ INTRA_DC_TM = (1 << TM_PRED) | (1 << DC_PRED),
+
+ INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
+
+ INTRA_DC_TM_H_V = INTRA_DC_TM | (1 << V_PRED) | (1 << H_PRED)
+};
+
+enum {
+ DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) |
+ (1 << THR_COMP_LA) |
+ (1 << THR_ALTR) |
+ (1 << THR_GOLD) |
+ (1 << THR_LAST),
+
+ DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT,
+
+ DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA),
+
+ LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) |
+ (1 << THR_COMP_LA) |
+ (1 << THR_ALTR) |
+ (1 << THR_GOLD)
+};
static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
SPEED_FEATURES *sf, int speed) {
@@ -49,8 +56,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
if (speed >= 1) {
sf->use_square_partition_only = !frame_is_intra_only(cm);
sf->less_rectangular_check = 1;
- sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
- : USE_LARGESTALL;
+ sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD
+ : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -73,8 +80,8 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
}
if (speed >= 2) {
- sf->tx_size_search_method = vp9_frame_is_boosted(cpi) ? USE_FULL_RD
- : USE_LARGESTALL;
+ sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD
+ : USE_LARGESTALL;
if (MIN(cm->width, cm->height) >= 720)
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -269,9 +276,6 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 360;
-
- sf->use_nonrd_pick_mode = 1;
- sf->search_method = FAST_DIAMOND;
}
if (speed >= 7) {
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 72f548a04..55422979a 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -344,7 +344,7 @@ typedef struct {
int search_type_check_frequency;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
- int source_var_thresh;
+ unsigned int source_var_thresh;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index c2b6263f0..9b3fc6eab 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -178,7 +178,6 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
cpi->oxcf.starting_buffer_level = lc->starting_buffer_level;
cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level;
cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size;
- cpi->output_framerate = lc->framerate;
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1) {
@@ -197,7 +196,6 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
lc->starting_buffer_level = oxcf->starting_buffer_level;
lc->optimal_buffer_level = oxcf->optimal_buffer_level;
lc->maximum_buffer_size = oxcf->maximum_buffer_size;
- lc->framerate = cpi->output_framerate;
}
void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h
index 62e20dc00..c9e39a1a2 100644
--- a/vp9/encoder/vp9_variance.h
+++ b/vp9/encoder/vp9_variance.h
@@ -45,12 +45,6 @@ typedef void (*vp9_sad_multi_fn_t)(const uint8_t *src_ptr,
int ref_stride,
unsigned int *sad_array);
-typedef void (*vp9_sad_multi1_fn_t)(const uint8_t *src_ptr,
- int source_stride,
- const uint8_t *ref_ptr,
- int ref_stride,
- unsigned int *sad_array);
-
typedef void (*vp9_sad_multi_d_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t* const ref_ptr[],
@@ -96,7 +90,7 @@ typedef struct vp9_variance_vtable {
vp9_variance_fn_t svf_halfpix_v;
vp9_variance_fn_t svf_halfpix_hv;
vp9_sad_multi_fn_t sdx3f;
- vp9_sad_multi1_fn_t sdx8f;
+ vp9_sad_multi_fn_t sdx8f;
vp9_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
diff --git a/vp9/encoder/x86/vp9_mcomp_x86.h b/vp9/encoder/x86/vp9_mcomp_x86.h
deleted file mode 100644
index c15039ad8..000000000
--- a/vp9/encoder/x86/vp9_mcomp_x86.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#ifndef VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-#define VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if HAVE_SSE3
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx3
-
-#undef vp9_search_refining_search
-#define vp9_search_refining_search vp9_refining_search_sadx4
-
-#undef vp9_search_diamond_search
-#define vp9_search_diamond_search vp9_diamond_search_sadx4
-
-#endif
-#endif
-
-#if HAVE_SSE4_1
-#if !CONFIG_RUNTIME_CPU_DETECT
-
-#undef vp9_search_full_search
-#define vp9_search_full_search vp9_full_search_sadx8
-
-#endif
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif // VP9_ENCODER_X86_VP9_MCOMP_X86_H_
-
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index 0623ad132..967431c0f 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -168,6 +168,11 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
+ if (cfg->rc_resize_allowed == 1) {
+ RANGE_CHECK(cfg, rc_scaled_width, 1, cfg->g_w);
+ RANGE_CHECK(cfg, rc_scaled_height, 1, cfg->g_h);
+ }
+
RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
if (cfg->ts_number_layers > 1) {
@@ -332,6 +337,10 @@ static vpx_codec_err_t set_encoder_config(
oxcf->under_shoot_pct = cfg->rc_undershoot_pct;
oxcf->over_shoot_pct = cfg->rc_overshoot_pct;
+ oxcf->allow_spatial_resampling = cfg->rc_resize_allowed;
+ oxcf->scaled_frame_width = cfg->rc_scaled_width;
+ oxcf->scaled_frame_height = cfg->rc_scaled_height;
+
oxcf->maximum_buffer_size = cfg->rc_buf_sz;
oxcf->starting_buffer_level = cfg->rc_buf_initial_sz;
oxcf->optimal_buffer_level = cfg->rc_buf_optimal_sz;
@@ -410,6 +419,9 @@ static vpx_codec_err_t set_encoder_config(
printf("fixed_q: %d\n", oxcf->fixed_q);
printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q);
printf("best_allowed_q: %d\n", oxcf->best_allowed_q);
+ printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling);
+ printf("scaled_frame_width: %d\n", oxcf->scaled_frame_width);
+ printf("scaled_frame_height: %d\n", oxcf->scaled_frame_height);
printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias);
printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section);
printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section);
@@ -1128,6 +1140,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
0, // rc_dropframe_thresh
0, // rc_resize_allowed
+ 1, // rc_scaled_width
+ 1, // rc_scaled_height
60, // rc_resize_down_thresold
30, // rc_resize_up_thresold
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index da6c0f8b6..24b8d9de1 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -87,8 +87,6 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-
-VP9_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/vp9_mcomp_x86.h
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_mmx.c
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_variance_impl_mmx.asm
VP9_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp9_sad_mmx.asm