summaryrefslogtreecommitdiff
path: root/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'vp9')
-rw-r--r--vp9/common/arm/neon/vp9_convolve8_avg_neon.asm16
-rw-r--r--vp9/common/arm/neon/vp9_convolve8_neon.asm16
-rw-r--r--vp9/common/vp9_entropy.c10
-rw-r--r--vp9/common/vp9_entropymode.c8
-rw-r--r--vp9/common/vp9_entropymv.c4
-rw-r--r--vp9/common/vp9_loopfilter.c11
-rw-r--r--vp9/common/vp9_loopfilter.h7
-rw-r--r--vp9/common/vp9_onyxc_int.h6
-rw-r--r--vp9/common/vp9_pred_common.h12
-rw-r--r--vp9/decoder/vp9_decodeframe.c37
-rw-r--r--vp9/decoder/vp9_decodemv.c20
-rw-r--r--vp9/decoder/vp9_decoder.c16
-rw-r--r--vp9/decoder/vp9_decoder.h2
-rw-r--r--vp9/decoder/vp9_detokenize.c2
-rw-r--r--vp9/decoder/vp9_dthread.c69
-rw-r--r--vp9/decoder/vp9_dthread.h15
-rw-r--r--vp9/encoder/vp9_bitstream.c45
-rw-r--r--vp9/encoder/vp9_context_tree.h1
-rw-r--r--vp9/encoder/vp9_encodeframe.c741
-rw-r--r--vp9/encoder/vp9_encodemb.c20
-rw-r--r--vp9/encoder/vp9_encodemb.h10
-rw-r--r--vp9/encoder/vp9_encodemv.c2
-rw-r--r--vp9/encoder/vp9_encoder.c499
-rw-r--r--vp9/encoder/vp9_encoder.h21
-rw-r--r--vp9/encoder/vp9_firstpass.c119
-rw-r--r--vp9/encoder/vp9_pickmode.c127
-rw-r--r--vp9/encoder/vp9_pickmode.h6
-rw-r--r--vp9/encoder/vp9_ratectrl.c52
-rw-r--r--vp9/encoder/vp9_rd.c53
-rw-r--r--vp9/encoder/vp9_rd.h8
-rw-r--r--vp9/encoder/vp9_rdopt.c80
-rw-r--r--vp9/encoder/vp9_rdopt.h8
-rw-r--r--vp9/encoder/vp9_speed_features.c23
-rw-r--r--vp9/encoder/vp9_speed_features.h27
-rw-r--r--vp9/encoder/vp9_tokenize.c2
-rw-r--r--vp9/encoder/x86/vp9_denoiser_sse2.c332
-rw-r--r--vp9/vp9_cx_iface.c24
37 files changed, 1285 insertions, 1166 deletions
diff --git a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
index 6b20cb9bf..4d85846f0 100644
--- a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
+++ b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
@@ -78,7 +78,7 @@
mov r10, r6 ; w loop counter
-loop_horiz_v
+vp9_convolve8_avg_loop_horiz_v
vld1.8 {d24}, [r0], r1
vld1.8 {d25}, [r0], r1
vld1.8 {d26}, [r0], r1
@@ -101,7 +101,7 @@ loop_horiz_v
add r0, r0, #3
-loop_horiz
+vp9_convolve8_avg_loop_horiz
add r5, r0, #64
vld1.32 {d28[]}, [r0], r1
@@ -170,14 +170,14 @@ loop_horiz
vmov q9, q13
subs r6, r6, #4 ; w -= 4
- bgt loop_horiz
+ bgt vp9_convolve8_avg_loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r9 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
- bgt loop_horiz_v
+ bgt vp9_convolve8_avg_loop_horiz_v
pop {r4-r10, pc}
@@ -203,7 +203,7 @@ loop_horiz
lsl r1, r1, #1
lsl r3, r3, #1
-loop_vert_h
+vp9_convolve8_avg_loop_vert_h
mov r4, r0
add r7, r0, r1, asr #1
mov r5, r2
@@ -223,7 +223,7 @@ loop_vert_h
vmovl.u8 q10, d20
vmovl.u8 q11, d22
-loop_vert
+vp9_convolve8_avg_loop_vert
; always process a 4x4 block at a time
vld1.u32 {d24[0]}, [r7], r1
vld1.u32 {d26[0]}, [r4], r1
@@ -288,13 +288,13 @@ loop_vert
vmov d22, d25
subs r12, r12, #4 ; h -= 4
- bgt loop_vert
+ bgt vp9_convolve8_avg_loop_vert
; outer loop
add r0, r0, #4
add r2, r2, #4
subs r6, r6, #4 ; w -= 4
- bgt loop_vert_h
+ bgt vp9_convolve8_avg_loop_vert_h
pop {r4-r8, pc}
diff --git a/vp9/common/arm/neon/vp9_convolve8_neon.asm b/vp9/common/arm/neon/vp9_convolve8_neon.asm
index 45258454c..184c3ad67 100644
--- a/vp9/common/arm/neon/vp9_convolve8_neon.asm
+++ b/vp9/common/arm/neon/vp9_convolve8_neon.asm
@@ -78,7 +78,7 @@
mov r10, r6 ; w loop counter
-loop_horiz_v
+vp9_convolve8_loop_horiz_v
vld1.8 {d24}, [r0], r1
vld1.8 {d25}, [r0], r1
vld1.8 {d26}, [r0], r1
@@ -101,7 +101,7 @@ loop_horiz_v
add r0, r0, #3
-loop_horiz
+vp9_convolve8_loop_horiz
add r5, r0, #64
vld1.32 {d28[]}, [r0], r1
@@ -159,14 +159,14 @@ loop_horiz
vmov q9, q13
subs r6, r6, #4 ; w -= 4
- bgt loop_horiz
+ bgt vp9_convolve8_loop_horiz
; outer loop
mov r6, r10 ; restore w counter
add r0, r0, r9 ; src += src_stride * 4 - w
add r2, r2, r12 ; dst += dst_stride * 4 - w
subs r7, r7, #4 ; h -= 4
- bgt loop_horiz_v
+ bgt vp9_convolve8_loop_horiz_v
pop {r4-r10, pc}
@@ -192,7 +192,7 @@ loop_horiz
lsl r1, r1, #1
lsl r3, r3, #1
-loop_vert_h
+vp9_convolve8_loop_vert_h
mov r4, r0
add r7, r0, r1, asr #1
mov r5, r2
@@ -212,7 +212,7 @@ loop_vert_h
vmovl.u8 q10, d20
vmovl.u8 q11, d22
-loop_vert
+vp9_convolve8_loop_vert
; always process a 4x4 block at a time
vld1.u32 {d24[0]}, [r7], r1
vld1.u32 {d26[0]}, [r4], r1
@@ -266,13 +266,13 @@ loop_vert
vmov d22, d25
subs r12, r12, #4 ; h -= 4
- bgt loop_vert
+ bgt vp9_convolve8_loop_vert
; outer loop
add r0, r0, #4
add r2, r2, #4
subs r6, r6, #4 ; w -= 4
- bgt loop_vert_h
+ bgt vp9_convolve8_loop_vert_h
pop {r4-r8, pc}
diff --git a/vp9/common/vp9_entropy.c b/vp9/common/vp9_entropy.c
index c3fdeb48a..77a8709f0 100644
--- a/vp9/common/vp9_entropy.c
+++ b/vp9/common/vp9_entropy.c
@@ -748,10 +748,10 @@ void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full) {
}
void vp9_default_coef_probs(VP9_COMMON *cm) {
- vp9_copy(cm->fc.coef_probs[TX_4X4], default_coef_probs_4x4);
- vp9_copy(cm->fc.coef_probs[TX_8X8], default_coef_probs_8x8);
- vp9_copy(cm->fc.coef_probs[TX_16X16], default_coef_probs_16x16);
- vp9_copy(cm->fc.coef_probs[TX_32X32], default_coef_probs_32x32);
+ vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4);
+ vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8);
+ vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16);
+ vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32);
}
#define COEF_COUNT_SAT 24
@@ -765,7 +765,7 @@ static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size,
unsigned int count_sat,
unsigned int update_factor) {
const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
- vp9_coeff_probs_model *const probs = cm->fc.coef_probs[tx_size];
+ vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size];
const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size];
vp9_coeff_count_model *counts = cm->counts.coef[tx_size];
unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] =
diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 5b00b0082..1a24572ba 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -350,7 +350,7 @@ static void adapt_probs(const vp9_tree_index *tree,
void vp9_adapt_mode_probs(VP9_COMMON *cm) {
int i, j;
- FRAME_CONTEXT *fc = &cm->fc;
+ FRAME_CONTEXT *fc = cm->fc;
const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
const FRAME_COUNTS *counts = &cm->counts;
@@ -451,17 +451,17 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
lf->last_sharpness_level = -1;
vp9_default_coef_probs(cm);
- vp9_init_mode_probs(&cm->fc);
+ vp9_init_mode_probs(cm->fc);
vp9_init_mv_probs(cm);
if (cm->frame_type == KEY_FRAME ||
cm->error_resilient_mode || cm->reset_frame_context == 3) {
// Reset all frame contexts.
for (i = 0; i < FRAME_CONTEXTS; ++i)
- cm->frame_contexts[i] = cm->fc;
+ cm->frame_contexts[i] = *cm->fc;
} else if (cm->reset_frame_context == 2) {
// Reset only the frame context specified in the frame header.
- cm->frame_contexts[cm->frame_context_idx] = cm->fc;
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
if (frame_is_intra_only(cm))
diff --git a/vp9/common/vp9_entropymv.c b/vp9/common/vp9_entropymv.c
index 5bb048202..922c03947 100644
--- a/vp9/common/vp9_entropymv.c
+++ b/vp9/common/vp9_entropymv.c
@@ -196,7 +196,7 @@ static void adapt_probs(const vp9_tree_index *tree, const vp9_prob *pre_probs,
void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
int i, j;
- nmv_context *fc = &cm->fc.nmvc;
+ nmv_context *fc = &cm->fc->nmvc;
const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc;
const nmv_context_counts *counts = &cm->counts.mv;
@@ -229,5 +229,5 @@ void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) {
}
void vp9_init_mv_probs(VP9_COMMON *cm) {
- cm->fc.nmvc = default_nmv_context;
+ cm->fc->nmvc = default_nmv_context;
}
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index aca8d7b33..43a4fe5b9 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -1625,6 +1625,17 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
y_only);
}
+void vp9_loop_filter_data_reset(
+ LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+ struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) {
+ lf_data->frame_buffer = frame_buffer;
+ lf_data->cm = cm;
+ lf_data->start = 0;
+ lf_data->stop = 0;
+ lf_data->y_only = 0;
+ vpx_memcpy(lf_data->planes, planes, sizeof(lf_data->planes));
+}
+
int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) {
(void)unused;
vp9_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 0ede58ae4..4c15e6bd4 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -124,11 +124,12 @@ typedef struct LoopFilterWorkerData {
int start;
int stop;
int y_only;
-
- struct VP9LfSyncData *lf_sync;
- int num_lf_workers;
} LFWorkerData;
+void vp9_loop_filter_data_reset(
+ LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer,
+ struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]);
+
// Operates on the rows described by 'lf_data'.
int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused);
#ifdef __cplusplus
diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h
index f1eda9117..b818ae818 100644
--- a/vp9/common/vp9_onyxc_int.h
+++ b/vp9/common/vp9_onyxc_int.h
@@ -169,8 +169,8 @@ typedef struct VP9Common {
MV_REFERENCE_FRAME comp_var_ref[2];
REFERENCE_MODE reference_mode;
- FRAME_CONTEXT fc; /* this frame entropy */
- FRAME_CONTEXT frame_contexts[FRAME_CONTEXTS];
+ FRAME_CONTEXT *fc; /* this frame entropy */
+ FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS
unsigned int frame_context_idx; /* Context to use/update */
FRAME_COUNTS counts;
@@ -261,7 +261,7 @@ static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm,
int ctx) {
return frame_is_intra_only(cm) ? vp9_kf_partition_probs[ctx]
- : cm->fc.partition_prob[ctx];
+ : cm->fc->partition_prob[ctx];
}
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
diff --git a/vp9/common/vp9_pred_common.h b/vp9/common/vp9_pred_common.h
index 39774f142..cf13e4a91 100644
--- a/vp9/common/vp9_pred_common.h
+++ b/vp9/common/vp9_pred_common.h
@@ -54,7 +54,7 @@ static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) {
static INLINE vp9_prob vp9_get_skip_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.skip_probs[vp9_get_skip_context(xd)];
+ return cm->fc->skip_probs[vp9_get_skip_context(xd)];
}
int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd);
@@ -63,14 +63,14 @@ int vp9_get_intra_inter_context(const MACROBLOCKD *xd);
static INLINE vp9_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.intra_inter_prob[vp9_get_intra_inter_context(xd)];
+ return cm->fc->intra_inter_prob[vp9_get_intra_inter_context(xd)];
}
int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd);
static INLINE vp9_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
+ return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)];
}
int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
@@ -79,21 +79,21 @@ int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm,
static INLINE vp9_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd);
- return cm->fc.comp_ref_prob[pred_context];
+ return cm->fc->comp_ref_prob[pred_context];
}
int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd);
static INLINE vp9_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0];
+ return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0];
}
int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd);
static INLINE vp9_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
- return cm->fc.single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1];
+ return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1];
}
int vp9_get_tx_size_context(const MACROBLOCKD *xd);
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index dc712f045..66da63ac6 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -127,7 +127,7 @@ static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm,
}
static void read_frame_reference_mode_probs(VP9_COMMON *cm, vp9_reader *r) {
- FRAME_CONTEXT *const fc = &cm->fc;
+ FRAME_CONTEXT *const fc = cm->fc;
int i;
if (cm->reference_mode == REFERENCE_MODE_SELECT)
@@ -902,11 +902,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
// Be sure to sync as we might be resuming after a failed frame decode.
winterface->sync(&pbi->lf_worker);
- lf_data->frame_buffer = get_frame_new_buffer(cm);
- lf_data->cm = cm;
- vp9_copy(lf_data->planes, pbi->mb.plane);
- lf_data->stop = 0;
- lf_data->y_only = 0;
+ vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm,
+ pbi->mb.plane);
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
@@ -1065,14 +1062,19 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
// use num_threads - 1 workers.
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
+ // Ensure tile data offsets will be properly aligned. This may fail on
+ // platforms without DECLARE_ALIGNED().
+ assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
+ CHECK_MEM_ERROR(cm, pbi->tile_worker_data,
+ vpx_memalign(32, num_threads *
+ sizeof(*pbi->tile_worker_data)));
+ CHECK_MEM_ERROR(cm, pbi->tile_worker_info,
+ vpx_malloc(num_threads * sizeof(*pbi->tile_worker_info)));
for (i = 0; i < num_threads; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
++pbi->num_tile_workers;
winterface->init(worker);
- CHECK_MEM_ERROR(cm, worker->data1,
- vpx_memalign(32, sizeof(TileWorkerData)));
- CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo)));
if (i < num_threads - 1 && !winterface->reset(worker)) {
vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
"Tile decoder thread creation failed");
@@ -1082,8 +1084,11 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
- winterface->sync(&pbi->tile_workers[n]);
- pbi->tile_workers[n].hook = (VP9WorkerHook)tile_worker_hook;
+ VP9Worker *const worker = &pbi->tile_workers[n];
+ winterface->sync(worker);
+ worker->hook = (VP9WorkerHook)tile_worker_hook;
+ worker->data1 = &pbi->tile_worker_data[n];
+ worker->data2 = &pbi->tile_worker_info[n];
}
// Note: this memset assumes above_context[0], [1] and [2]
@@ -1386,7 +1391,7 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
size_t partition_size) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- FRAME_CONTEXT *const fc = &cm->fc;
+ FRAME_CONTEXT *const fc = cm->fc;
vp9_reader r;
int k;
@@ -1540,7 +1545,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
setup_plane_dequants(cm, xd, cm->base_qindex);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
- cm->fc = cm->frame_contexts[cm->frame_context_idx];
+ *cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cm->counts);
vp9_zero(xd->dqcoeff);
@@ -1555,7 +1560,9 @@ void vp9_decode_frame(VP9Decoder *pbi,
if (!xd->corrupted) {
// If multiple threads are used to decode tiles, then we use those threads
// to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, pbi, cm, cm->lf.filter_level, 0);
+ vp9_loop_filter_frame_mt(&pbi->lf_row_sync, new_fb, pbi->mb.plane, cm,
+ pbi->tile_workers, pbi->num_tile_workers,
+ cm->lf.filter_level, 0);
}
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
@@ -1580,5 +1587,5 @@ void vp9_decode_frame(VP9Decoder *pbi,
}
if (cm->refresh_frame_context)
- cm->frame_contexts[cm->frame_context_idx] = cm->fc;
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c
index a01fe842e..d0e0b76da 100644
--- a/vp9/decoder/vp9_decodemv.c
+++ b/vp9/decoder/vp9_decodemv.c
@@ -30,7 +30,7 @@ static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r,
int size_group) {
const PREDICTION_MODE y_mode =
- read_intra_mode(r, cm->fc.y_mode_prob[size_group]);
+ read_intra_mode(r, cm->fc->y_mode_prob[size_group]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.y_mode[size_group][y_mode];
return y_mode;
@@ -39,7 +39,7 @@ static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r,
static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r,
PREDICTION_MODE y_mode) {
const PREDICTION_MODE uv_mode = read_intra_mode(r,
- cm->fc.uv_mode_prob[y_mode]);
+ cm->fc->uv_mode_prob[y_mode]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.uv_mode[y_mode][uv_mode];
return uv_mode;
@@ -47,7 +47,7 @@ static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r,
static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, int ctx) {
const int mode = vp9_read_tree(r, vp9_inter_mode_tree,
- cm->fc.inter_mode_probs[ctx]);
+ cm->fc->inter_mode_probs[ctx]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.inter_mode[ctx][mode];
@@ -61,7 +61,7 @@ static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
TX_SIZE max_tx_size, vp9_reader *r) {
const int ctx = vp9_get_tx_size_context(xd);
- const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc.tx_probs);
+ const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
int tx_size = vp9_read(r, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
tx_size += vp9_read(r, tx_probs[1]);
@@ -150,7 +150,7 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
- const int skip = vp9_read(r, cm->fc.skip_probs[ctx]);
+ const int skip = vp9_read(r, cm->fc->skip_probs[ctx]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.skip[ctx][skip];
return skip;
@@ -258,7 +258,7 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
const int ctx = vp9_get_reference_mode_context(cm, xd);
const REFERENCE_MODE mode =
- (REFERENCE_MODE)vp9_read(r, cm->fc.comp_inter_prob[ctx]);
+ (REFERENCE_MODE)vp9_read(r, cm->fc->comp_inter_prob[ctx]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.comp_inter[ctx][mode];
return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
@@ -271,7 +271,7 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
vp9_reader *r,
int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
- FRAME_CONTEXT *const fc = &cm->fc;
+ FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
@@ -317,7 +317,7 @@ static INLINE INTERP_FILTER read_switchable_interp_filter(
const int ctx = vp9_get_pred_context_switchable_interp(xd);
const INTERP_FILTER type =
(INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree,
- cm->fc.switchable_interp_prob[ctx]);
+ cm->fc->switchable_interp_prob[ctx]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.switchable_interp[ctx][type];
return type;
@@ -372,7 +372,7 @@ static INLINE int assign_mv(VP9_COMMON *cm, PREDICTION_MODE mode,
nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ?
NULL : &cm->counts.mv;
for (i = 0; i < 1 + is_compound; ++i) {
- read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc.nmvc, mv_counts,
+ read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
allow_hp);
ret = ret && is_mv_valid(&mv[i].as_mv);
}
@@ -410,7 +410,7 @@ static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
INTRA_FRAME;
} else {
const int ctx = vp9_get_intra_inter_context(xd);
- const int is_inter = vp9_read(r, cm->fc.intra_inter_prob[ctx]);
+ const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
if (!cm->frame_parallel_decoding_mode)
++cm->counts.intra_inter[ctx][is_inter];
return is_inter;
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index baf6ab7ef..fa2f01041 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -59,6 +59,13 @@ VP9Decoder *vp9_decoder_create() {
}
cm->error.setjmp = 1;
+
+ CHECK_MEM_ERROR(cm, cm->fc,
+ (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
+ sizeof(*cm->frame_contexts)));
+
pbi->need_resync = 1;
initialize_dec();
@@ -88,15 +95,20 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
VP9_COMMON *const cm = &pbi->common;
int i;
+ vpx_free(cm->fc);
+ cm->fc = NULL;
+ vpx_free(cm->frame_contexts);
+ cm->frame_contexts = NULL;
+
vp9_get_worker_interface()->end(&pbi->lf_worker);
vpx_free(pbi->lf_worker.data1);
vpx_free(pbi->tile_data);
for (i = 0; i < pbi->num_tile_workers; ++i) {
VP9Worker *const worker = &pbi->tile_workers[i];
vp9_get_worker_interface()->end(worker);
- vpx_free(worker->data1);
- vpx_free(worker->data2);
}
+ vpx_free(pbi->tile_worker_data);
+ vpx_free(pbi->tile_worker_info);
vpx_free(pbi->tile_workers);
if (pbi->num_tile_workers > 0) {
diff --git a/vp9/decoder/vp9_decoder.h b/vp9/decoder/vp9_decoder.h
index 4f52bb9c4..25b7339ed 100644
--- a/vp9/decoder/vp9_decoder.h
+++ b/vp9/decoder/vp9_decoder.h
@@ -46,6 +46,8 @@ typedef struct VP9Decoder {
VP9Worker lf_worker;
VP9Worker *tile_workers;
+ TileWorkerData *tile_worker_data;
+ TileInfo *tile_worker_info;
int num_tile_workers;
TileData *tile_data;
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c
index 421229a28..8704fddac 100644
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -58,7 +58,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type,
int ctx, const int16_t *scan, const int16_t *nb,
vp9_reader *r) {
const int max_eob = 16 << (tx_size << 1);
- const FRAME_CONTEXT *const fc = &cm->fc;
+ const FRAME_CONTEXT *const fc = cm->fc;
FRAME_COUNTS *const counts = &cm->counts;
const int ref = is_inter_block(&xd->mi[0].src_mi->mbmi);
int band, c = 0;
diff --git a/vp9/decoder/vp9_dthread.c b/vp9/decoder/vp9_dthread.c
index 69e4fde85..3d2d0dd2e 100644
--- a/vp9/decoder/vp9_dthread.c
+++ b/vp9/decoder/vp9_dthread.c
@@ -92,12 +92,12 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
VP9_COMMON *const cm,
struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int y_only,
- VP9LfSync *const lf_sync, int num_lf_workers) {
+ VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int r, c; // SB row and col
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
- for (r = start; r < stop; r += num_lf_workers) {
+ for (r = start; r < stop; r += lf_sync->num_workers) {
const int mi_row = r << MI_BLOCK_SIZE_LOG2;
MODE_INFO *const mi = cm->mi + mi_row * cm->mi_stride;
@@ -121,35 +121,35 @@ static void loop_filter_rows_mt(const YV12_BUFFER_CONFIG *const frame_buffer,
}
// Row-based multi-threaded loopfilter hook
-static int loop_filter_row_worker(TileWorkerData *const tile_data,
- void *unused) {
- LFWorkerData *const lf_data = &tile_data->lfdata;
- (void)unused;
+static int loop_filter_row_worker(VP9LfSync *const lf_sync,
+ LFWorkerData *const lf_data) {
loop_filter_rows_mt(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
- lf_data->start, lf_data->stop, lf_data->y_only,
- lf_data->lf_sync, lf_data->num_lf_workers);
+ lf_data->start, lf_data->stop, lf_data->y_only, lf_sync);
return 1;
}
// VP9 decoder: Implement multi-threaded loopfilter that uses the tile
// threads.
-void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
- VP9Decoder *pbi, VP9_COMMON *cm,
+void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync,
+ YV12_BUFFER_CONFIG *frame,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
+ VP9_COMMON *cm,
+ VP9Worker *workers, int nworkers,
int frame_filter_level,
int y_only) {
- VP9LfSync *const lf_sync = &pbi->lf_row_sync;
const VP9WorkerInterface *const winterface = vp9_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
const int tile_cols = 1 << cm->log2_tile_cols;
- const int num_workers = MIN(pbi->max_threads & ~1, tile_cols);
+ const int num_workers = MIN(nworkers, tile_cols);
int i;
if (!frame_filter_level) return;
- if (!lf_sync->sync_range || cm->last_height != cm->height) {
+ if (!lf_sync->sync_range || cm->last_height != cm->height ||
+ num_workers > lf_sync->num_workers) {
vp9_loop_filter_dealloc(lf_sync);
- vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width);
+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
vp9_loop_filter_frame_init(cm, frame_filter_level);
@@ -158,32 +158,26 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Set up loopfilter thread data.
- // The decoder is using num_workers instead of pbi->num_tile_workers
- // because it has been observed that using more threads on the
- // loopfilter, than there are tile columns in the frame will hurt
- // performance on Android. This is because the system will only
- // schedule the tile decode workers on cores equal to the number
- // of tile columns. Then if the decoder tries to use more threads for the
- // loopfilter, it will hurt performance because of contention. If the
- // multithreading code changes in the future then the number of workers
- // used by the loopfilter should be revisited.
+ // The decoder is capping num_workers because it has been observed that using
+ // more threads on the loopfilter than there are cores will hurt performance
+ // on Android. This is because the system will only schedule the tile decode
+ // workers on cores equal to the number of tile columns. Then if the decoder
+ // tries to use more threads for the loopfilter, it will hurt performance
+ // because of contention. If the multithreading code changes in the future
+ // then the number of workers used by the loopfilter should be revisited.
for (i = 0; i < num_workers; ++i) {
- VP9Worker *const worker = &pbi->tile_workers[i];
- TileWorkerData *const tile_data = (TileWorkerData*)worker->data1;
- LFWorkerData *const lf_data = &tile_data->lfdata;
+ VP9Worker *const worker = &workers[i];
+ LFWorkerData *const lf_data = &lf_sync->lfdata[i];
worker->hook = (VP9WorkerHook)loop_filter_row_worker;
+ worker->data1 = lf_sync;
+ worker->data2 = lf_data;
// Loopfilter data
- lf_data->frame_buffer = frame;
- lf_data->cm = cm;
- vp9_copy(lf_data->planes, pbi->mb.plane);
+ vp9_loop_filter_data_reset(lf_data, frame, cm, planes);
lf_data->start = i;
lf_data->stop = sb_rows;
- lf_data->y_only = y_only; // always do all planes in decoder
-
- lf_data->lf_sync = lf_sync;
- lf_data->num_lf_workers = num_workers;
+ lf_data->y_only = y_only;
// Start loopfiltering
if (i == num_workers - 1) {
@@ -195,7 +189,7 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
// Wait till all rows are finished
for (i = 0; i < num_workers; ++i) {
- winterface->sync(&pbi->tile_workers[i]);
+ winterface->sync(&workers[i]);
}
}
@@ -215,7 +209,7 @@ static int get_sync_range(int width) {
// Allocate memory for lf row synchronization
void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
- int width) {
+ int width, int num_workers) {
lf_sync->rows = rows;
#if CONFIG_MULTITHREAD
{
@@ -239,6 +233,10 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
}
#endif // CONFIG_MULTITHREAD
+ CHECK_MEM_ERROR(cm, lf_sync->lfdata,
+ vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
+ lf_sync->num_workers = num_workers;
+
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
@@ -265,6 +263,7 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
vpx_free(lf_sync->cond_);
}
#endif // CONFIG_MULTITHREAD
+ vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
diff --git a/vp9/decoder/vp9_dthread.h b/vp9/decoder/vp9_dthread.h
index b1fbdeb74..d5810b45b 100644
--- a/vp9/decoder/vp9_dthread.h
+++ b/vp9/decoder/vp9_dthread.h
@@ -22,9 +22,6 @@ typedef struct TileWorkerData {
struct VP9Common *cm;
vp9_reader bit_reader;
DECLARE_ALIGNED(16, struct macroblockd, xd);
-
- // Row-based parallel loopfilter data
- LFWorkerData lfdata;
} TileWorkerData;
// Loopfilter row synchronization
@@ -39,19 +36,25 @@ typedef struct VP9LfSyncData {
// determined by testing. Currently, it is chosen to be a power-of-2 number.
int sync_range;
int rows;
+
+ // Row-based parallel loopfilter data
+ LFWorkerData *lfdata;
+ int num_workers;
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
- int width);
+ int width, int num_workers);
// Deallocate loopfilter synchronization related mutex and data.
void vp9_loop_filter_dealloc(VP9LfSync *lf_sync);
// Multi-threaded loopfilter that uses the tile threads.
-void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
- struct VP9Decoder *pbi,
+void vp9_loop_filter_frame_mt(VP9LfSync *lf_sync,
+ YV12_BUFFER_CONFIG *frame,
+ struct macroblockd_plane planes[MAX_MB_PLANE],
struct VP9Common *cm,
+ VP9Worker *workers, int num_workers,
int frame_filter_level,
int y_only);
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 421e04969..7cfd14307 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -84,7 +84,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
vp9_writer *w) {
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vp9_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
- &cm->fc.tx_probs);
+ &cm->fc->tx_probs);
vp9_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vp9_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -108,14 +108,14 @@ static void update_skip_probs(VP9_COMMON *cm, vp9_writer *w) {
int k;
for (k = 0; k < SKIP_CONTEXTS; ++k)
- vp9_cond_prob_diff_update(w, &cm->fc.skip_probs[k], cm->counts.skip[k]);
+ vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], cm->counts.skip[k]);
}
static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w) {
int j;
for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
prob_diff_update(vp9_switchable_interp_tree,
- cm->fc.switchable_interp_prob[j],
+ cm->fc->switchable_interp_prob[j],
cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w);
}
@@ -237,7 +237,7 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
vp9_writer *w) {
VP9_COMMON *const cm = &cpi->common;
- const nmv_context *nmvc = &cm->fc.nmvc;
+ const nmv_context *nmvc = &cm->fc->nmvc;
const MACROBLOCK *const x = &cpi->mb;
const MACROBLOCKD *const xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
@@ -275,7 +275,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
if (!is_inter) {
if (bsize >= BLOCK_8X8) {
- write_intra_mode(w, mode, cm->fc.y_mode_prob[size_group_lookup[bsize]]);
+ write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]);
} else {
int idx, idy;
const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
@@ -283,14 +283,14 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode;
- write_intra_mode(w, b_mode, cm->fc.y_mode_prob[0]);
+ write_intra_mode(w, b_mode, cm->fc->y_mode_prob[0]);
}
}
}
- write_intra_mode(w, mbmi->uv_mode, cm->fc.uv_mode_prob[mode]);
+ write_intra_mode(w, mbmi->uv_mode, cm->fc->uv_mode_prob[mode]);
} else {
const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
- const vp9_prob *const inter_probs = cm->fc.inter_mode_probs[mode_ctx];
+ const vp9_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx];
write_ref_frames(cm, xd, w);
// If segment skip is not enabled code the mode.
@@ -304,7 +304,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
if (cm->interp_filter == SWITCHABLE) {
const int ctx = vp9_get_pred_context_switchable_interp(xd);
vp9_write_token(w, vp9_switchable_interp_tree,
- cm->fc.switchable_interp_prob[ctx],
+ cm->fc->switchable_interp_prob[ctx],
&switchable_interp_encodings[mbmi->interp_filter]);
++cpi->interp_filter_selected[0][mbmi->interp_filter];
} else {
@@ -528,7 +528,7 @@ static void update_coef_probs_common(vp9_writer* const bc, VP9_COMP *cpi,
TX_SIZE tx_size,
vp9_coeff_stats *frame_branch_ct,
vp9_coeff_probs_model *new_coef_probs) {
- vp9_coeff_probs_model *old_coef_probs = cpi->common.fc.coef_probs[tx_size];
+ vp9_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size];
const vp9_prob upd = DIFF_UPDATE_PROB;
const int entropy_nodes_update = UNCONSTRAINED_NODES;
int i, j, k, l, t;
@@ -830,20 +830,20 @@ static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w) {
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p);
for (j = 0; j < TX_SIZES - 3; j++)
- vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p8x8[i][j], ct_8x8p[j]);
+ vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p);
for (j = 0; j < TX_SIZES - 2; j++)
- vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p16x16[i][j],
+ vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j],
ct_16x16p[j]);
}
for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p);
for (j = 0; j < TX_SIZES - 1; j++)
- vp9_cond_prob_diff_update(w, &cm->fc.tx_probs.p32x32[i][j],
+ vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j],
ct_32x32p[j]);
}
}
@@ -929,13 +929,11 @@ static int get_refresh_mask(VP9_COMP *cpi) {
static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
VP9_COMMON *const cm = &cpi->common;
vp9_writer residual_bc;
-
int tile_row, tile_col;
TOKENEXTRA *tok[4][1 << 6], *tok_end;
size_t total_size = 0;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
- TileInfo tile[4][1 << 6];
TOKENEXTRA *pre_tok = cpi->tok;
int tile_tok = 0;
@@ -944,18 +942,16 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
-
+ int tile_idx = tile_row * tile_cols + tile_col;
tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = tok[tile_row][tile_col];
- tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ tile_tok = allocated_tokens(cpi->tile_data[tile_idx].tile_info);
}
}
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
- const TileInfo * const ptile = &tile[tile_row][tile_col];
-
+ int tile_idx = tile_row * tile_cols + tile_col;
tok_end = tok[tile_row][tile_col] + cpi->tok_count[tile_row][tile_col];
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1)
@@ -963,7 +959,8 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
else
vp9_start_encode(&residual_bc, data_ptr + total_size);
- write_modes(cpi, ptile, &residual_bc, &tok[tile_row][tile_col], tok_end);
+ write_modes(cpi, &cpi->tile_data[tile_idx].tile_info,
+ &residual_bc, &tok[tile_row][tile_col], tok_end);
assert(tok[tile_row][tile_col] == tok_end);
vp9_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
@@ -1161,7 +1158,7 @@ static void write_uncompressed_header(VP9_COMP *cpi,
static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
- FRAME_CONTEXT *const fc = &cm->fc;
+ FRAME_CONTEXT *const fc = cm->fc;
vp9_writer header_bc;
vp9_start_encode(&header_bc, data);
@@ -1178,7 +1175,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
int i;
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
- prob_diff_update(vp9_inter_mode_tree, cm->fc.inter_mode_probs[i],
+ prob_diff_update(vp9_inter_mode_tree, cm->fc->inter_mode_probs[i],
cm->counts.inter_mode[i], INTER_MODES, &header_bc);
vp9_zero(cm->counts.inter_mode);
@@ -1219,7 +1216,7 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) {
cm->counts.comp_ref[i]);
for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
- prob_diff_update(vp9_intra_mode_tree, cm->fc.y_mode_prob[i],
+ prob_diff_update(vp9_intra_mode_tree, cm->fc->y_mode_prob[i],
cm->counts.y_mode[i], INTRA_MODES, &header_bc);
for (i = 0; i < PARTITION_CONTEXTS; ++i)
diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h
index 6b28ee591..47d9580a8 100644
--- a/vp9/encoder/vp9_context_tree.h
+++ b/vp9/encoder/vp9_context_tree.h
@@ -34,6 +34,7 @@ typedef struct {
int is_coded;
int num_4x4_blk;
int skip;
+ int pred_pixel_ready;
// For current partition, only if all Y, U, and V transform blocks'
// coefficients are quantized to 0, skippable is set to 0.
int skippable;
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index f4e71aeb3..baa4908d4 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -467,7 +467,6 @@ static void choose_partitioning(VP9_COMP *cpi,
int sp;
int dp;
int pixels_wide = 64, pixels_high = 64;
- int_mv nearest_mv, near_mv;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
@@ -488,11 +487,7 @@ static void choose_partitioning(VP9_COMP *cpi,
xd->mi[0].src_mi->mbmi.ref_frame[0] = LAST_FRAME;
xd->mi[0].src_mi->mbmi.sb_type = BLOCK_64X64;
- vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
- xd->mi[0].src_mi->mbmi.ref_mvs[LAST_FRAME],
- &nearest_mv, &near_mv);
-
- xd->mi[0].src_mi->mbmi.mv[0] = nearest_mv;
+ xd->mi[0].src_mi->mbmi.mv[0].as_int = 0;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
d = xd->plane[0].dst.buf;
@@ -750,8 +745,8 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
x->e_mbd.plane[i].subsampling_y);
}
-static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate,
- int64_t *dist, BLOCK_SIZE bsize) {
+static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
+ RD_COST *rd_cost, BLOCK_SIZE bsize) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
INTERP_FILTER filter_ref;
@@ -777,15 +772,16 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, int *rate,
xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = 0;
x->skip = 1;
- *rate = 0;
- *dist = 0;
+ vp9_rd_cost_init(rd_cost);
}
-static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
+static void rd_pick_sb_modes(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
@@ -801,7 +797,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
// Use the lower precision, but faster, 32x32 fdct for mode selection.
x->use_lp32x32fdct = 1;
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
mbmi = &xd->mi[0].src_mi->mbmi;
mbmi->sb_type = bsize;
@@ -813,6 +809,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
}
ctx->is_coded = 0;
ctx->skippable = 0;
+ ctx->pred_pixel_ready = 0;
x->skip_recode = 0;
// Set to zero to make sure we do not use the previous encoded frame stats
@@ -872,14 +869,14 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
} else {
if (bsize >= BLOCK_8X8) {
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- vp9_rd_pick_inter_mode_sb_seg_skip(cpi, x, rd_cost, bsize,
+ vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
else
- vp9_rd_pick_inter_mode_sb(cpi, x, tile, mi_row, mi_col,
+ vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col,
rd_cost, bsize, ctx, best_rd);
} else {
- vp9_rd_pick_inter_mode_sub8x8(cpi, x, tile, mi_row, mi_col, rd_cost,
- bsize, ctx, best_rd);
+ vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col,
+ rd_cost, bsize, ctx, best_rd);
}
}
@@ -1161,79 +1158,6 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-static void copy_partitioning(VP9_COMMON *cm, MODE_INFO *mi_8x8,
- MODE_INFO *prev_mi_8x8) {
- const int mis = cm->mi_stride;
- int block_row, block_col;
-
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- MODE_INFO *const prev_mi =
- prev_mi_8x8[block_row * mis + block_col].src_mi;
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
-
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[block_row * mis + block_col].src_mi = cm->mi + offset;
- mi_8x8[block_row * mis + block_col].src_mi->mbmi.sb_type = sb_type;
- }
- }
- }
-}
-
-static void constrain_copy_partitioning(VP9_COMP *const cpi,
- const TileInfo *const tile,
- MODE_INFO *mi_8x8,
- MODE_INFO *prev_mi_8x8,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- VP9_COMMON *const cm = &cpi->common;
- const int mis = cm->mi_stride;
- const int row8x8_remaining = tile->mi_row_end - mi_row;
- const int col8x8_remaining = tile->mi_col_end - mi_col;
- MODE_INFO *const mi_upper_left = cm->mi + mi_row * mis + mi_col;
- const int bh = num_8x8_blocks_high_lookup[bsize];
- const int bw = num_8x8_blocks_wide_lookup[bsize];
- int block_row, block_col;
-
- assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
-
- // If the SB64 if it is all "in image".
- if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
- (row8x8_remaining >= MI_BLOCK_SIZE)) {
- for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
- for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
- const int index = block_row * mis + block_col;
- MODE_INFO *prev_mi = prev_mi_8x8[index].src_mi;
- const BLOCK_SIZE sb_type = prev_mi ? prev_mi->mbmi.sb_type : 0;
- // Use previous partition if block size is not larger than bsize.
- if (prev_mi && sb_type <= bsize) {
- int block_row2, block_col2;
- for (block_row2 = 0; block_row2 < bh; ++block_row2) {
- for (block_col2 = 0; block_col2 < bw; ++block_col2) {
- const int index2 = (block_row + block_row2) * mis +
- block_col + block_col2;
- prev_mi = prev_mi_8x8[index2].src_mi;
- if (prev_mi) {
- const ptrdiff_t offset = prev_mi - cm->prev_mi;
- mi_8x8[index2].src_mi = cm->mi + offset;
- mi_8x8[index2].src_mi->mbmi.sb_type = prev_mi->mbmi.sb_type;
- }
- }
- }
- } else {
- // Otherwise, use fixed partition of size bsize.
- mi_8x8[index].src_mi = mi_upper_left + index;
- mi_8x8[index].src_mi->mbmi.sb_type = bsize;
- }
- }
- }
- } else {
- // Else this is a partial SB64, copy previous partition.
- copy_partitioning(cm, mi_8x8, prev_mi_8x8);
- }
-}
-
const struct {
int row;
int col;
@@ -1364,27 +1288,6 @@ static int is_background(const VP9_COMP *cpi, const TileInfo *const tile,
return this_sad < 2 * threshold;
}
-static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO *prev_mi_8x8,
- const int motion_thresh) {
- const int mis = cm->mi_stride;
- int block_row, block_col;
-
- if (cm->prev_mi) {
- for (block_row = 0; block_row < 8; ++block_row) {
- for (block_col = 0; block_col < 8; ++block_col) {
- const MODE_INFO *prev_mi =
- prev_mi_8x8[block_row * mis + block_col].src_mi;
- if (prev_mi) {
- if (abs(prev_mi->mbmi.mv[0].as_mv.row) > motion_thresh ||
- abs(prev_mi->mbmi.mv[0].as_mv.col) > motion_thresh)
- return 1;
- }
- }
- }
- }
- return 0;
-}
-
static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
int mi_row, int mi_col, int bsize) {
VP9_COMMON *const cm = &cpi->common;
@@ -1516,12 +1419,15 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile,
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
-static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
+static void rd_use_partition(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
MODE_INFO *mi_8x8, TOKENEXTRA **tp,
int mi_row, int mi_col,
- BLOCK_SIZE bsize, int *rate, int64_t *dist,
+ BLOCK_SIZE bsize,
+ int *rate, int64_t *dist,
int do_recon, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mis = cm->mi_stride;
@@ -1557,7 +1463,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
x->mb_energy = vp9_block_energy(cpi, x, bsize);
}
@@ -1583,7 +1489,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
mi_row + (mi_step >> 1) < cm->mi_rows &&
mi_col + (mi_step >> 1) < cm->mi_cols) {
pc_tree->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &none_rdc, bsize,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &none_rdc, bsize,
ctx, INT64_MAX);
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -1602,11 +1508,11 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
switch (partition) {
case PARTITION_NONE:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc,
bsize, ctx, INT64_MAX);
break;
case PARTITION_HORZ:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->horizontal[0],
INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
@@ -1616,7 +1522,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
- rd_pick_sb_modes(cpi, tile, mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
+ rd_pick_sb_modes(cpi, tile_data,
+ mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
vp9_rd_cost_reset(&last_part_rdc);
@@ -1628,7 +1535,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
break;
case PARTITION_VERT:
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX &&
bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
@@ -1637,7 +1544,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx);
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
+ rd_pick_sb_modes(cpi, tile_data,
+ mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -1651,7 +1559,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
break;
case PARTITION_SPLIT:
if (bsize == BLOCK_8X8) {
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &last_part_rdc,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc,
subsize, pc_tree->leaf_split[0], INT64_MAX);
break;
}
@@ -1667,7 +1575,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
continue;
vp9_rd_cost_init(&tmp_rdc);
- rd_use_partition(cpi, tile, mi_8x8 + jj * bss * mis + ii * bss, tp,
+ rd_use_partition(cpi, tile_data,
+ mi_8x8 + jj * bss * mis + ii * bss, tp,
mi_row + y_idx, mi_col + x_idx, subsize,
&tmp_rdc.rate, &tmp_rdc.dist,
i != 3, pc_tree->split[i]);
@@ -1718,7 +1627,8 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
pc_tree->split[i]->partitioning = PARTITION_NONE;
- rd_pick_sb_modes(cpi, tile, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
+ rd_pick_sb_modes(cpi, tile_data,
+ mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
split_subsize, &pc_tree->split[i]->none, INT64_MAX);
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -1732,7 +1642,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
chosen_rdc.dist += tmp_rdc.dist;
if (i != 3)
- encode_sb(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, 0,
+ encode_sb(cpi, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0,
split_subsize, pc_tree->split[i]);
pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
@@ -1782,7 +1692,7 @@ static void rd_use_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
chosen_rdc.rate, chosen_rdc.dist);
- encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize,
+ encode_sb(cpi, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
pc_tree);
}
@@ -2115,11 +2025,13 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
-static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
+static void rd_pick_partition(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, RD_COST *rd_cost,
int64_t best_rd, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -2162,7 +2074,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
vp9_rd_cost_reset(&best_rdc);
best_rdc.rdcost = best_rd;
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
x->mb_energy = vp9_block_energy(cpi, x, bsize);
@@ -2194,7 +2106,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
src_diff_var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
mi_row, mi_col, bsize);
}
@@ -2253,8 +2165,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// PARTITION_NONE
if (partition_none_allowed) {
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rdc, bsize, ctx,
- best_rdc.rdcost);
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col,
+ &this_rdc, bsize, ctx, best_rdc.rdcost);
if (this_rdc.rate != INT_MAX) {
if (bsize >= BLOCK_8X8) {
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -2323,7 +2235,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
if (skip) {
if (src_diff_var == UINT_MAX) {
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
src_diff_var = get_sby_perpixel_diff_variance(
cpi, &cpi->mb.plane[0].src, mi_row, mi_col, bsize);
}
@@ -2353,7 +2265,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
if (sum_rdc.rate == INT_MAX)
sum_rdc.rdcost = INT64_MAX;
@@ -2369,7 +2281,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
load_pred_mv(x, ctx);
pc_tree->split[i]->index = i;
- rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
+ rd_pick_partition(cpi, tile_data, tp,
+ mi_row + y_idx, mi_col + x_idx,
subsize, &this_rdc,
best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
@@ -2412,7 +2325,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0], best_rdc.rdcost);
if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
@@ -2427,8 +2340,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row + mi_step, mi_col, &this_rdc,
- subsize, &pc_tree->horizontal[1],
+ rd_pick_sb_modes(cpi, tile_data, mi_row + mi_step, mi_col,
+ &this_rdc, subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -2460,7 +2373,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rdc, subsize,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0], best_rdc.rdcost);
if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
bsize > BLOCK_8X8) {
@@ -2474,7 +2387,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter =
ctx->mic.mbmi.interp_filter;
- rd_pick_sb_modes(cpi, tile, mi_row, mi_col + mi_step, &this_rdc, subsize,
+ rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + mi_step,
+ &this_rdc, subsize,
&pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
@@ -2520,7 +2434,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
best_rdc.rate, best_rdc.dist);
- encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
+ encode_sb(cpi, tile_info, tp, mi_row, mi_col, output_enabled,
+ bsize, pc_tree);
}
if (bsize == BLOCK_64X64) {
@@ -2532,9 +2447,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
+static void encode_rd_sb_row(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ int mi_row,
+ TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &cpi->mb.e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
int mi_col;
@@ -2544,7 +2462,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
int dummy_rate;
int64_t dummy_dist;
@@ -2553,10 +2471,6 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
- MODE_INFO *prev_mi = NULL;
-
- if (cm->frame_type != KEY_FRAME)
- prev_mi = (cm->prev_mip + cm->mi_stride + 1 + idx_str)->src_mi;
if (sf->adaptive_pred_interp_filter) {
for (i = 0; i < 64; ++i)
@@ -2573,56 +2487,34 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
vp9_zero(cpi->mb.pred_mv);
cpi->pc_root->index = 0;
- // TODO(yunqingwang): use_lastframe_partitioning is no longer used in good-
- // quality encoding. Need to evaluate it in real-time encoding later to
- // decide if it can be removed too. And then, do the code cleanup.
cpi->mb.source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col,
+ set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64);
+ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col,
sf->always_this_block_size);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else if (cpi->partition_search_skippable_frame) {
BLOCK_SIZE bsize;
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
+ set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col);
- set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+ rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME ) {
- choose_partitioning(cpi, tile, mi_row, mi_col);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
- } else if (sf->partition_search_type == SEARCH_PARTITION &&
- sf->use_lastframe_partitioning &&
- (cpi->rc.frames_since_key %
- sf->last_partitioning_redo_frequency) &&
- cm->prev_mi &&
- cm->show_frame &&
- cm->frame_type != KEY_FRAME &&
- !cpi->rc.is_src_frame_alt_ref &&
- ((sf->use_lastframe_partitioning !=
- LAST_FRAME_PARTITION_LOW_MOTION) ||
- !sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))) {
- if (sf->constrain_copy_partition &&
- sb_has_motion(cm, prev_mi, sf->lf_motion_threshold))
- constrain_copy_partitioning(cpi, tile, mi, prev_mi,
- mi_row, mi_col, BLOCK_16X16);
- else
- copy_partitioning(cm, mi, prev_mi);
- rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, cpi->pc_root);
+ choose_partitioning(cpi, tile_info, mi_row, mi_col);
+ rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else {
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
- set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
- rd_auto_partition_range(cpi, tile, mi_row, mi_col,
+ set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64);
+ rd_auto_partition_range(cpi, tile_info, mi_row, mi_col,
&sf->min_partition_size,
&sf->max_partition_size);
}
- rd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
+ rd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rdc, INT64_MAX, cpi->pc_root);
}
}
@@ -2695,15 +2587,16 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) {
return cpi->common.tx_mode;
}
-static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, int mi_col,
- int *rate, int64_t *dist,
+static void nonrd_pick_sb_modes(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi;
- set_offsets(cpi, tile, mi_row, mi_col, bsize);
+ set_offsets(cpi, tile_info, mi_row, mi_col, bsize);
mbmi = &xd->mi[0].src_mi->mbmi;
mbmi->sb_type = bsize;
@@ -2712,11 +2605,15 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
- set_mode_info_seg_skip(x, cm->tx_mode, rate, dist, bsize);
+ set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else
- vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, rate, dist, bsize, ctx);
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
+ rd_cost, bsize, ctx);
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
+
+ if (rd_cost->rate == INT_MAX)
+ vp9_rd_cost_reset(rd_cost);
}
static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
@@ -2776,14 +2673,16 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
}
}
-static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
+static void nonrd_pick_partition(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
TOKENEXTRA **tp, int mi_row,
- int mi_col, BLOCK_SIZE bsize, int *rate,
- int64_t *dist, int do_recon, int64_t best_rd,
+ int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost,
+ int do_recon, int64_t best_rd,
PC_TREE *pc_tree) {
const SPEED_FEATURES *const sf = &cpi->sf;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
@@ -2791,9 +2690,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
PICK_MODE_CONTEXT *ctx = &pc_tree->none;
int i;
BLOCK_SIZE subsize = bsize;
- int this_rate, sum_rate = 0, best_rate = INT_MAX;
- int64_t this_dist, sum_dist = 0, best_dist = INT64_MAX;
- int64_t sum_rd = 0;
+ RD_COST this_rdc, sum_rdc, best_rdc;
int do_split = bsize >= BLOCK_8X8;
int do_rect = 1;
// Override skipping rectangular partition operations for edge blocks
@@ -2812,6 +2709,10 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
+ vp9_rd_cost_init(&sum_rdc);
+ vp9_rd_cost_reset(&best_rdc);
+ best_rdc.rdcost = best_rd;
+
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (sf->auto_min_max_partition_size) {
@@ -2832,17 +2733,19 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// PARTITION_NONE
if (partition_none_allowed) {
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, bsize, ctx);
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col,
+ &this_rdc, bsize, ctx);
ctx->mic.mbmi = xd->mi[0].src_mi->mbmi;
ctx->skip_txfm[0] = x->skip_txfm[0];
ctx->skip = x->skip;
+ ctx->pred_pixel_ready = 0;
- if (this_rate != INT_MAX) {
+ if (this_rdc.rate != INT_MAX) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_NONE];
- sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
- if (sum_rd < best_rd) {
+ this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
+ if (this_rdc.rdcost < best_rdc.rdcost) {
int64_t dist_breakout_thr = sf->partition_search_breakout_dist_thr;
int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
@@ -2851,15 +2754,13 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
rate_breakout_thr *= num_pels_log2_lookup[bsize];
- best_rate = this_rate;
- best_dist = this_dist;
- best_rd = sum_rd;
+ best_rdc = this_rdc;
if (bsize >= BLOCK_8X8)
pc_tree->partitioning = PARTITION_NONE;
if (!x->e_mbd.lossless &&
- this_rate < rate_breakout_thr &&
- this_dist < dist_breakout_thr) {
+ this_rdc.rate < rate_breakout_thr &&
+ this_rdc.dist < dist_breakout_thr) {
do_split = 0;
do_rect = 0;
}
@@ -2871,35 +2772,34 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
store_pred_mv(x, ctx);
// PARTITION_SPLIT
- sum_rd = 0;
if (do_split) {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
subsize = get_subsize(bsize, PARTITION_SPLIT);
- for (i = 0; i < 4 && sum_rd < best_rd; ++i) {
+ for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
const int x_idx = (i & 1) * ms;
const int y_idx = (i >> 1) * ms;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
load_pred_mv(x, ctx);
- nonrd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx,
- subsize, &this_rate, &this_dist, 0,
- best_rd - sum_rd, pc_tree->split[i]);
+ nonrd_pick_partition(cpi, tile_data, tp,
+ mi_row + y_idx, mi_col + x_idx,
+ subsize, &this_rdc, 0,
+ best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
}
}
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
} else {
// skip rectangular partition test when larger block size
@@ -2915,40 +2815,39 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (sf->adaptive_motion_search)
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0]);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
+ pc_tree->horizontal[0].pred_pixel_ready = 0;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
-
- if (sum_rd < best_rd && mi_row + ms < cm->mi_rows) {
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row + ms, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col,
+ &this_rdc, subsize,
&pc_tree->horizontal[1]);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
+ pc_tree->horizontal[1].pred_pixel_ready = 0;
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_HORZ];
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
}
}
- if (sum_rd < best_rd) {
- best_rd = sum_rd;
- best_rate = sum_rate;
- best_dist = sum_dist;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_HORZ;
}
}
@@ -2960,55 +2859,54 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
if (sf->adaptive_motion_search)
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0]);
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
- if (sum_rd < best_rd && mi_col + ms < cm->mi_cols) {
+ pc_tree->vertical[0].pred_pixel_ready = 0;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
load_pred_mv(x, ctx);
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + ms,
- &this_rate, &this_dist, subsize,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms,
+ &this_rdc, subsize,
&pc_tree->vertical[1]);
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
- if (this_rate == INT_MAX) {
- sum_rd = INT64_MAX;
+ pc_tree->vertical[1].pred_pixel_ready = 0;
+
+ if (this_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(&sum_rdc);
} else {
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- this_rate += cpi->partition_cost[pl][PARTITION_VERT];
- sum_rate += this_rate;
- sum_dist += this_dist;
- sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
+ sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ sum_rdc.rate, sum_rdc.dist);
}
}
- if (sum_rd < best_rd) {
- best_rate = sum_rate;
- best_dist = sum_dist;
- best_rd = sum_rd;
+
+ if (sum_rdc.rdcost < best_rdc.rdcost) {
+ best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_VERT;
}
}
- // TODO(JBB): The following line is here just to avoid a static warning
- // that occurs because at this point we never again reuse best_rd
- // despite setting it here. The code should be refactored to avoid this.
- (void) best_rd;
- *rate = best_rate;
- *dist = best_dist;
+ *rd_cost = best_rdc;
- if (best_rate == INT_MAX)
+ if (best_rdc.rate == INT_MAX) {
+ vp9_rd_cost_reset(rd_cost);
return;
+ }
// update mode info array
subsize = get_subsize(bsize, pc_tree->partitioning);
fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, subsize,
pc_tree);
- if (best_rate < INT_MAX && best_dist < INT64_MAX && do_recon) {
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
int output_enabled = (bsize == BLOCK_64X64);
// Check the projected output rate for this SB against it's target
@@ -3016,33 +2914,165 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// closer to the target.
if ((oxcf->aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) {
vp9_select_in_frame_q_segment(cpi, mi_row, mi_col, output_enabled,
- best_rate);
+ best_rdc.rate);
}
if (oxcf->aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- best_rate, best_dist);
+ best_rdc.rate, best_rdc.dist);
- encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, bsize, pc_tree);
+ encode_sb_rt(cpi, tile_info, tp, mi_row, mi_col, output_enabled,
+ bsize, pc_tree);
}
if (bsize == BLOCK_64X64) {
assert(tp_orig < *tp);
- assert(best_rate < INT_MAX);
- assert(best_dist < INT64_MAX);
+ assert(best_rdc.rate < INT_MAX);
+ assert(best_rdc.dist < INT64_MAX);
} else {
assert(tp_orig == *tp);
}
}
+static void nonrd_select_partition(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ MODE_INFO *mi,
+ TOKENEXTRA **tp,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int output_enabled,
+ RD_COST *rd_cost, PC_TREE *pc_tree) {
+ VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ MACROBLOCK *const x = &cpi->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
+ const int mis = cm->mi_stride;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+ RD_COST this_rdc;
+
+ vp9_rd_cost_reset(&this_rdc);
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+ return;
+
+ subsize = (bsize >= BLOCK_8X8) ? mi[0].src_mi->mbmi.sb_type : BLOCK_4X4;
+ partition = partition_lookup[bsl][subsize];
+
+ if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
+ subsize >= BLOCK_16X16) {
+ cpi->sf.max_partition_size = BLOCK_32X32;
+ cpi->sf.min_partition_size = BLOCK_8X8;
+ nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, bsize,
+ rd_cost, 0, INT64_MAX, pc_tree);
+ } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
+ cpi->sf.max_partition_size = BLOCK_16X16;
+ cpi->sf.min_partition_size = BLOCK_8X8;
+ nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, bsize,
+ rd_cost, 0, INT64_MAX, pc_tree);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->none);
+ pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->none.skip = x->skip;
+ pc_tree->none.pred_pixel_ready = 1;
+ break;
+ case PARTITION_VERT:
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->vertical[0]);
+ pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->vertical[0].skip = x->skip;
+ pc_tree->vertical[0].pred_pixel_ready = 1;
+ if (mi_col + hbs < cm->mi_cols) {
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
+ &this_rdc, subsize, &pc_tree->vertical[1]);
+ pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->vertical[1].skip = x->skip;
+ pc_tree->vertical[1].pred_pixel_ready = 1;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ }
+ break;
+ case PARTITION_HORZ:
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
+ subsize, &pc_tree->horizontal[0]);
+ pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->horizontal[0].skip = x->skip;
+ pc_tree->horizontal[0].pred_pixel_ready = 1;
+ if (mi_row + hbs < cm->mi_rows) {
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
+ &this_rdc, subsize, &pc_tree->horizontal[0]);
+ pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
+ pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
+ pc_tree->horizontal[1].skip = x->skip;
+ pc_tree->horizontal[1].pred_pixel_ready = 1;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ }
+ break;
+ case PARTITION_SPLIT:
+ subsize = get_subsize(bsize, PARTITION_SPLIT);
+ nonrd_select_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ subsize, output_enabled, rd_cost,
+ pc_tree->split[0]);
+ nonrd_select_partition(cpi, tile_data, mi + hbs, tp,
+ mi_row, mi_col + hbs, subsize, output_enabled,
+ &this_rdc, pc_tree->split[1]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ nonrd_select_partition(cpi, tile_data, mi + hbs * mis, tp,
+ mi_row + hbs, mi_col, subsize, output_enabled,
+ &this_rdc, pc_tree->split[2]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ nonrd_select_partition(cpi, tile_data, mi + hbs * mis + hbs, tp,
+ mi_row + hbs, mi_col + hbs, subsize,
+ output_enabled, &this_rdc, pc_tree->split[3]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
+ }
+ break;
+ default:
+ assert("Invalid partition type.");
+ break;
+ }
+ }
+
+ if (bsize == BLOCK_64X64 && output_enabled) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
+ rd_cost->rate, rd_cost->dist);
+ encode_sb_rt(cpi, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
+ }
+}
+
+
static void nonrd_use_partition(VP9_COMP *cpi,
- const TileInfo *const tile,
+ TileDataEnc *tile_data,
MODE_INFO *mi,
TOKENEXTRA **tp,
int mi_row, int mi_col,
BLOCK_SIZE bsize, int output_enabled,
- int *totrate, int64_t *totdist,
- PC_TREE *pc_tree) {
+ RD_COST *rd_cost, PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3050,9 +3080,9 @@ static void nonrd_use_partition(VP9_COMP *cpi,
const int mis = cm->mi_stride;
PARTITION_TYPE partition;
BLOCK_SIZE subsize;
- int rate = INT_MAX;
- int64_t dist = INT64_MAX;
+ RD_COST this_rdc;
+ vp9_rd_cost_reset(&this_rdc);
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
@@ -3061,78 +3091,78 @@ static void nonrd_use_partition(VP9_COMP *cpi,
switch (partition) {
case PARTITION_NONE:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
subsize, &pc_tree->none);
pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
pc_tree->none.skip = x->skip;
break;
case PARTITION_VERT:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
subsize, &pc_tree->vertical[0]);
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[0].skip = x->skip;
if (mi_col + hbs < cm->mi_cols) {
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs,
- &rate, &dist, subsize, &pc_tree->vertical[1]);
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
+ &this_rdc, subsize, &pc_tree->vertical[1]);
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->vertical[1].skip = x->skip;
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
}
break;
case PARTITION_HORZ:
- nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist,
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
subsize, &pc_tree->horizontal[0]);
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[0].skip = x->skip;
if (mi_row + hbs < cm->mi_rows) {
- nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col,
- &rate, &dist, subsize, &pc_tree->horizontal[0]);
+ nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
+ &this_rdc, subsize, &pc_tree->horizontal[0]);
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
pc_tree->horizontal[1].skip = x->skip;
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
}
break;
case PARTITION_SPLIT:
subsize = get_subsize(bsize, PARTITION_SPLIT);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
- subsize, output_enabled, totrate, totdist,
+ nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ subsize, output_enabled, rd_cost,
pc_tree->split[0]);
- nonrd_use_partition(cpi, tile, mi + hbs, tp,
+ nonrd_use_partition(cpi, tile_data, mi + hbs, tp,
mi_row, mi_col + hbs, subsize, output_enabled,
- &rate, &dist, pc_tree->split[1]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[1]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
- nonrd_use_partition(cpi, tile, mi + hbs * mis, tp,
+ nonrd_use_partition(cpi, tile_data, mi + hbs * mis, tp,
mi_row + hbs, mi_col, subsize, output_enabled,
- &rate, &dist, pc_tree->split[2]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[2]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
- nonrd_use_partition(cpi, tile, mi + hbs * mis + hbs, tp,
+ nonrd_use_partition(cpi, tile_data, mi + hbs * mis + hbs, tp,
mi_row + hbs, mi_col + hbs, subsize, output_enabled,
- &rate, &dist, pc_tree->split[3]);
- if (rate != INT_MAX && dist != INT64_MAX &&
- *totrate != INT_MAX && *totdist != INT64_MAX) {
- *totrate += rate;
- *totdist += dist;
+ &this_rdc, pc_tree->split[3]);
+ if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
+ rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
+ rd_cost->rate += this_rdc.rate;
+ rd_cost->dist += this_rdc.dist;
}
break;
default:
@@ -3143,15 +3173,19 @@ static void nonrd_use_partition(VP9_COMP *cpi,
if (bsize == BLOCK_64X64 && output_enabled) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_set_rate_and_dist_sb(cpi->cyclic_refresh,
- *totrate, *totdist);
- encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize, pc_tree);
+ rd_cost->rate, rd_cost->dist);
+ encode_sb_rt(cpi, &tile_data->tile_info, tp, mi_row, mi_col,
+ 1, bsize, pc_tree);
}
}
-static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
- int mi_row, TOKENEXTRA **tp) {
+static void encode_nonrd_sb_row(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ int mi_row,
+ TOKENEXTRA **tp) {
SPEED_FEATURES *const sf = &cpi->sf;
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &x->e_mbd;
int mi_col;
@@ -3161,53 +3195,55 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
vpx_memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
// Code each SB in the row
- for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
+ for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- int dummy_rate = 0;
- int64_t dummy_dist = 0;
+ RD_COST dummy_rdc;
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO *mi = cm->mi + idx_str;
BLOCK_SIZE bsize;
x->in_static_area = 0;
x->source_variance = UINT_MAX;
vp9_zero(x->pred_mv);
+ vp9_rd_cost_init(&dummy_rdc);
// Set the partition type of the 64X64 block
switch (sf->partition_search_type) {
case VAR_BASED_PARTITION:
- choose_partitioning(cpi, tile, mi_row, mi_col);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ choose_partitioning(cpi, tile_info, mi_row, mi_col);
+ nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root);
break;
case SOURCE_VAR_BASED_PARTITION:
- set_source_var_based_partition(cpi, tile, mi, mi_row, mi_col);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ set_source_var_based_partition(cpi, tile_info, mi, mi_row, mi_col);
+ nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root);
break;
case FIXED_PARTITION:
bsize = sf->partition_search_type == FIXED_PARTITION ?
sf->always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
- set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
- 1, &dummy_rate, &dummy_dist, cpi->pc_root);
+ set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
+ nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root);
break;
case REFERENCE_PARTITION:
- if (sf->partition_check ||
- !(x->in_static_area = is_background(cpi, tile, mi_row, mi_col))) {
- set_modeinfo_offsets(cm, xd, mi_row, mi_col);
- auto_partition_range(cpi, tile, mi_row, mi_col,
+ set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64);
+ x->in_static_area = is_background(cpi, tile_info, mi_row, mi_col);
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
+ xd->mi[0].src_mi->mbmi.segment_id && x->in_static_area) {
+ auto_partition_range(cpi, tile_info, mi_row, mi_col,
&sf->min_partition_size,
&sf->max_partition_size);
- nonrd_pick_partition(cpi, tile, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, INT64_MAX,
- cpi->pc_root);
+ nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rdc, 1,
+ INT64_MAX, cpi->pc_root);
} else {
- choose_partitioning(cpi, tile, mi_row, mi_col);
- nonrd_use_partition(cpi, tile, mi, tp, mi_row, mi_col,
- BLOCK_64X64, 1, &dummy_rate, &dummy_dist,
- cpi->pc_root);
+ choose_partitioning(cpi, tile_info, mi_row, mi_col);
+ nonrd_select_partition(cpi, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root);
}
+
break;
default:
assert(0);
@@ -3343,43 +3379,64 @@ static int get_skip_encode_frame(const VP9_COMMON *cm) {
cm->show_frame;
}
+static void tile_data_init(TileDataEnc *tile_data) {
+ int i, j;
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] = 32;
+ tile_data->mode_map[i][j] = j;
+ }
+ }
+}
+
static void encode_tiles(VP9_COMP *cpi) {
- const VP9_COMMON *const cm = &cpi->common;
+ VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_col, tile_row;
- TileInfo tile[4][1 << 6];
TOKENEXTRA *tok[4][1 << 6];
TOKENEXTRA *pre_tok = cpi->tok;
int tile_tok = 0;
+ if (cpi->tile_data == NULL) {
+ CHECK_MEM_ERROR(cm, cpi->tile_data,
+ vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col)
+ tile_data_init(&cpi->tile_data[tile_row * tile_cols + tile_col]);
+ }
+
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- vp9_tile_init(&tile[tile_row][tile_col], cm, tile_row, tile_col);
+ TileInfo *tile_info =
+ &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
+ vp9_tile_init(tile_info, cm, tile_row, tile_col);
tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = tok[tile_row][tile_col];
- tile_tok = allocated_tokens(tile[tile_row][tile_col]);
+ tile_tok = allocated_tokens(*tile_info);
}
}
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- const TileInfo * const ptile = &tile[tile_row][tile_col];
+ const TileInfo * const tile_info =
+ &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
TOKENEXTRA * const old_tok = tok[tile_row][tile_col];
int mi_row;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- for (mi_row = ptile->mi_row_start; mi_row < ptile->mi_row_end;
+ for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode && !frame_is_intra_only(cm))
- encode_nonrd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
+ encode_nonrd_sb_row(cpi, this_tile, mi_row, &tok[tile_row][tile_col]);
else
- encode_rd_sb_row(cpi, ptile, mi_row, &tok[tile_row][tile_col]);
+ encode_rd_sb_row(cpi, this_tile, mi_row, &tok[tile_row][tile_col]);
}
cpi->tok_count[tile_row][tile_col] =
(unsigned int)(tok[tile_row][tile_col] - old_tok);
- assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*ptile));
+ assert(tok[tile_row][tile_col] - old_tok <= allocated_tokens(*tile_info));
}
}
}
@@ -3737,7 +3794,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
&xd->block_refs[ref]->sf);
}
- if (!cpi->sf.reuse_inter_pred_sby || seg_skip)
+ if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index f5faa7c23..8ce30789f 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -29,12 +29,6 @@ struct optimize_ctx {
ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
};
-struct encode_b_args {
- MACROBLOCK *x;
- struct optimize_ctx *ctx;
- int8_t *skip;
-};
-
void vp9_subtract_block_c(int rows, int cols,
int16_t *diff, ptrdiff_t diff_stride,
const uint8_t *src, ptrdiff_t src_stride,
@@ -802,7 +796,7 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
}
}
-static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct encode_b_args* const args = arg;
MACROBLOCK *const x = args->x;
@@ -1040,18 +1034,10 @@ static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
*(args->skip) = 0;
}
-void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int8_t *skip) {
- struct encode_b_args arg = {x, NULL, skip};
- encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
-}
-
-
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
const MACROBLOCKD *const xd = &x->e_mbd;
struct encode_b_args arg = {x, NULL, &xd->mi[0].src_mi->mbmi.skip};
- vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
- &arg);
+ vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
+ vp9_encode_block_intra, &arg);
}
diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h
index 54d2b3751..97df8a66b 100644
--- a/vp9/encoder/vp9_encodemb.h
+++ b/vp9/encoder/vp9_encodemb.h
@@ -18,6 +18,11 @@
extern "C" {
#endif
+struct encode_b_args {
+ MACROBLOCK *x;
+ struct optimize_ctx *ctx;
+ int8_t *skip;
+};
void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize);
void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block,
@@ -29,9 +34,8 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
-void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
- BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
- int8_t *skip);
+void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg);
void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c
index 089839567..f36d76e3d 100644
--- a/vp9/encoder/vp9_encodemv.c
+++ b/vp9/encoder/vp9_encodemv.c
@@ -163,7 +163,7 @@ static void write_mv_update(const vp9_tree_index *tree,
void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) {
int i, j;
- nmv_context *const mvc = &cm->fc.nmvc;
+ nmv_context *const mvc = &cm->fc->nmvc;
nmv_context_counts *const counts = &cm->counts.mv;
write_mv_update(vp9_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w);
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index c5e872607..dfc636a41 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -134,7 +134,7 @@ static void setup_frame(VP9_COMP *cpi) {
cpi->refresh_alt_ref_frame = 1;
vp9_zero(cpi->interp_filter_selected);
} else {
- cm->fc = cm->frame_contexts[cm->frame_context_idx];
+ *cm->fc = cm->frame_contexts[cm->frame_context_idx];
vp9_zero(cpi->interp_filter_selected[0]);
}
}
@@ -160,6 +160,13 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
int i;
+ vpx_free(cm->fc);
+ cm->fc = NULL;
+ vpx_free(cm->frame_contexts);
+ cm->frame_contexts = NULL;
+ vpx_free(cpi->tile_data);
+ cpi->tile_data = NULL;
+
// Delete sementation map
vpx_free(cpi->segmentation_map);
cpi->segmentation_map = NULL;
@@ -257,7 +264,7 @@ static void save_coding_context(VP9_COMP *cpi) {
vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas);
vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas);
- cc->fc = cm->fc;
+ cc->fc = *cm->fc;
}
static void restore_coding_context(VP9_COMP *cpi) {
@@ -286,7 +293,7 @@ static void restore_coding_context(VP9_COMP *cpi) {
vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas);
vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas);
- cm->fc = cc->fc;
+ *cm->fc = cc->fc;
}
static void configure_static_seg_features(VP9_COMP *cpi) {
@@ -1374,6 +1381,12 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cm->error.setjmp = 1;
+ CHECK_MEM_ERROR(cm, cm->fc,
+ (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(cm, cm->frame_contexts,
+ (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
+ sizeof(*cm->frame_contexts)));
+
cpi->use_svc = 0;
init_config(cpi, oxcf);
@@ -1381,6 +1394,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
cm->current_video_frame = 0;
cpi->partition_search_skippable_frame = 0;
+ cpi->tile_data = NULL;
// Create the encoder segmentation map and set all entries to 0
CHECK_MEM_ERROR(cm, cpi->segmentation_map,
@@ -1435,6 +1449,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
#endif
cpi->refresh_alt_ref_frame = 0;
+ cpi->multi_arf_last_grp_enabled = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
@@ -2397,30 +2412,37 @@ void vp9_scale_references(VP9_COMP *cpi) {
const VP9_REFFRAME ref_mask[3] = {VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG};
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
-
// Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1).
- if ((cpi->ref_frame_flags & ref_mask[ref_frame - 1]) &&
- (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height)) {
- const int new_fb = get_free_fb(cm);
- vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif // CONFIG_VP9_HIGHBITDEPTH
- VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+ if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) {
+ const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+ const YV12_BUFFER_CONFIG *const ref = &cm->frame_bufs[idx].buf;
+
#if CONFIG_VP9_HIGHBITDEPTH
- scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf,
- (int)cm->bit_depth);
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
+ const int new_fb = get_free_fb(cm);
+ vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ cm->use_highbitdepth,
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+ scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf,
+ (int)cm->bit_depth);
#else
- scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
+ if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
+ const int new_fb = get_free_fb(cm);
+ vp9_realloc_frame_buffer(&cm->frame_bufs[new_fb].buf,
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+ scale_and_extend_frame(ref, &cm->frame_bufs[new_fb].buf);
#endif // CONFIG_VP9_HIGHBITDEPTH
- cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+ cpi->scaled_ref_idx[ref_frame - 1] = new_fb;
+ } else {
+ cpi->scaled_ref_idx[ref_frame - 1] = idx;
+ ++cm->frame_bufs[idx].ref_count;
+ }
} else {
- cpi->scaled_ref_idx[ref_frame - 1] = idx;
- cm->frame_bufs[idx].ref_count++;
+ cpi->scaled_ref_idx[ref_frame - 1] = INVALID_REF_BUFFER_IDX;
}
}
}
@@ -2428,9 +2450,13 @@ void vp9_scale_references(VP9_COMP *cpi) {
static void release_scaled_references(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int i;
-
- for (i = 0; i < 3; i++)
- cm->frame_bufs[cpi->scaled_ref_idx[i]].ref_count--;
+ for (i = 0; i < MAX_REF_FRAMES; ++i) {
+ const int idx = cpi->scaled_ref_idx[i];
+ RefCntBuffer *const buf =
+ idx != INVALID_REF_BUFFER_IDX ? &cm->frame_bufs[idx] : NULL;
+ if (buf != NULL)
+ --buf->ref_count;
+ }
}
static void full_to_model_count(unsigned int *model_count,
@@ -2515,10 +2541,181 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
}
#endif
-static void encode_without_recode_loop(VP9_COMP *cpi,
- int q) {
+static void set_mv_search_params(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const unsigned int max_mv_def = MIN(cm->width, cm->height);
+
+ // Default based on max resolution.
+ cpi->mv_step_param = vp9_init_search_range(max_mv_def);
+
+ if (cpi->sf.mv.auto_mv_step_size) {
+ if (frame_is_intra_only(cm)) {
+ // Initialize max_mv_magnitude for use in the first INTER frame
+ // after a key/intra-only frame.
+ cpi->max_mv_magnitude = max_mv_def;
+ } else {
+ if (cm->show_frame) {
+ // Allow mv_steps to correspond to twice the max mv magnitude found
+ // in the previous frame, capped by the default max_mv_magnitude based
+ // on resolution.
+ cpi->mv_step_param =
+ vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
+ }
+ cpi->max_mv_magnitude = 0;
+ }
+ }
+}
+
+static void set_size_dependent_vars(VP9_COMP *cpi, int *q,
+ int *bottom_index, int *top_index) {
+ VP9_COMMON *const cm = &cpi->common;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+
+ // Setup variables that depend on the dimensions of the frame.
+ set_mv_search_params(cpi);
+
+ // Configure experimental use of segmentation for enhanced coding of
+ // static regions if indicated.
+ // Only allowed in the second pass of a two pass encode, as it requires
+ // lagged coding, and if the relevant speed feature flag is set.
+ if (oxcf->pass == 2 && cpi->sf.static_segmentation)
+ configure_static_seg_features(cpi);
+
+#if CONFIG_VP9_POSTPROC
+ if (oxcf->noise_sensitivity > 0) {
+ int l = 0;
+ switch (oxcf->noise_sensitivity) {
+ case 1:
+ l = 20;
+ break;
+ case 2:
+ l = 40;
+ break;
+ case 3:
+ l = 60;
+ break;
+ case 4:
+ case 5:
+ l = 100;
+ break;
+ case 6:
+ l = 150;
+ break;
+ }
+ vp9_denoise(cpi->Source, cpi->Source, l);
+ }
+#endif // CONFIG_VP9_POSTPROC
+
+ vp9_set_speed_features(cpi);
+
+ vp9_set_rd_speed_thresholds(cpi);
+ vp9_set_rd_speed_thresholds_sub8x8(cpi);
+
+ // Decide q and q bounds.
+ *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+
+ if (!frame_is_intra_only(cm)) {
+ cm->interp_filter = cpi->sf.default_interp_filter;
+ vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
+ }
+}
+
+static void init_motion_estimation(VP9_COMP *cpi) {
+ int y_stride = cpi->scaled_source.y_stride;
+
+ if (cpi->sf.mv.search_method == NSTEP) {
+ vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
+ } else if (cpi->sf.mv.search_method == DIAMOND) {
+ vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
+ }
+}
+
+extern void vbr_rate_correction(VP9_COMP *cpi,
+ int * this_frame_target,
+ const int64_t vbr_bits_off_target);
+
+void set_frame_size(VP9_COMP *cpi) {
+ int ref_frame;
+ VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ MACROBLOCKD *const xd = &cpi->mb.e_mbd;
+
+ if ((oxcf->pass == 2) &&
+ (!cpi->use_svc ||
+ (is_two_pass_svc(cpi) &&
+ cpi->svc.encode_empty_frame_state != ENCODING))) {
+ int target_rate = rc->base_frame_target;
+ if (oxcf->rc_mode == VPX_VBR)
+ vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target);
+ vp9_rc_set_frame_target(cpi, target_rate);
+ }
+
+ if (oxcf->pass == 2 &&
+ cm->current_video_frame == 0 &&
+ oxcf->allow_spatial_resampling &&
+ oxcf->rc_mode == VPX_VBR) {
+ // Internal scaling is triggered on the first frame.
+ vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
+ }
+
+ // Reset the frame pointers to the current frame size.
+ vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
+ cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+
+ alloc_util_frame_buffers(cpi);
+ init_motion_estimation(cpi);
+
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
+ YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
+ RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
+ ref_buf->buf = buf;
+ ref_buf->idx = idx;
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+ buf->y_crop_width, buf->y_crop_height,
+ cm->width, cm->height,
+ (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
+ 1 : 0);
+#else
+ vp9_setup_scale_factors_for_frame(&ref_buf->sf,
+ buf->y_crop_width, buf->y_crop_height,
+ cm->width, cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ if (vp9_is_scaled(&ref_buf->sf))
+ vp9_extend_frame_borders(buf);
+ }
+
+ set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
+}
+
+static void encode_without_recode_loop(VP9_COMP *cpi) {
+ int q;
+ int bottom_index, top_index; // Dummy.
VP9_COMMON *const cm = &cpi->common;
+
vp9_clear_system_state();
+
+ set_frame_size(cpi);
+
+ cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+ &cpi->scaled_source);
+
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+
+ vp9_scale_references(cpi);
+
+ set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
vp9_set_quantizer(cm, q);
setup_frame(cpi);
// Variance adaptive and in frame q adjustment experiments are mutually
@@ -2541,28 +2738,45 @@ static void encode_without_recode_loop(VP9_COMP *cpi,
static void encode_with_recode_loop(VP9_COMP *cpi,
size_t *size,
- uint8_t *dest,
- int q,
- int bottom_index,
- int top_index) {
+ uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ int q;
+ int q_low, q_high;
+ int bottom_index, top_index;
int loop_count = 0;
int loop = 0;
int overshoot_seen = 0;
int undershoot_seen = 0;
- int q_low = bottom_index, q_high = top_index;
int frame_over_shoot_limit;
int frame_under_shoot_limit;
- // Decide frame size bounds
- vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
- &frame_under_shoot_limit,
- &frame_over_shoot_limit);
-
do {
vp9_clear_system_state();
+ if (loop_count == 0) {
+ set_frame_size(cpi);
+
+ // Decide frame size bounds
+ vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
+ &frame_under_shoot_limit,
+ &frame_over_shoot_limit);
+
+ cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
+ &cpi->scaled_source);
+
+ if (cpi->unscaled_last_source != NULL)
+ cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
+ &cpi->scaled_last_source);
+
+ vp9_scale_references(cpi);
+
+ set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+
+ q_low = bottom_index;
+ q_high = top_index;
+ }
+
vp9_set_quantizer(cm, q);
if (loop_count == 0)
@@ -2804,25 +3018,6 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
}
}
-static int is_skippable_frame(const VP9_COMP *cpi) {
- // If the current frame does not have non-zero motion vector detected in the
- // first pass, and so do its previous and forward frames, then this frame
- // can be skipped for partition check, and the partition size is assigned
- // according to the variance
- const SVC *const svc = &cpi->svc;
- const TWO_PASS *const twopass = is_two_pass_svc(cpi) ?
- &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
-
- return (!frame_is_intra_only(&cpi->common) &&
- twopass->stats_in - 2 > twopass->stats_in_start &&
- twopass->stats_in < twopass->stats_in_end &&
- (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion
- == 1 &&
- (twopass->stats_in - 2)->pcnt_inter - (twopass->stats_in - 2)->pcnt_motion
- == 1 &&
- twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
-}
-
static void set_arf_sign_bias(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
int arf_sign_bias;
@@ -2839,31 +3034,6 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
}
-static void set_mv_search_params(VP9_COMP *cpi) {
- const VP9_COMMON *const cm = &cpi->common;
- const unsigned int max_mv_def = MIN(cm->width, cm->height);
-
- // Default based on max resolution.
- cpi->mv_step_param = vp9_init_search_range(max_mv_def);
-
- if (cpi->sf.mv.auto_mv_step_size) {
- if (frame_is_intra_only(cm)) {
- // Initialize max_mv_magnitude for use in the first INTER frame
- // after a key/intra-only frame.
- cpi->max_mv_magnitude = max_mv_def;
- } else {
- if (cm->show_frame)
- // Allow mv_steps to correspond to twice the max mv magnitude found
- // in the previous frame, capped by the default max_mv_magnitude based
- // on resolution.
- cpi->mv_step_param =
- vp9_init_search_range(MIN(max_mv_def, 2 * cpi->max_mv_magnitude));
- cpi->max_mv_magnitude = 0;
- }
- }
-}
-
-
int setup_interp_filter_search_mask(VP9_COMP *cpi) {
INTERP_FILTER ifilter;
int ref_total[MAX_REF_FRAMES] = {0};
@@ -2898,21 +3068,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
struct segmentation *const seg = &cm->seg;
TX_SIZE t;
- int q;
- int top_index;
- int bottom_index;
set_ext_overrides(cpi);
- cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source);
-
- if (cpi->unscaled_last_source != NULL)
- cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source,
- &cpi->scaled_last_source);
-
- vp9_scale_references(cpi);
-
vp9_clear_system_state();
// Enable or disable mode based tweaking of the zbin.
@@ -2927,14 +3085,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
- set_mv_search_params(cpi);
-
if (cpi->oxcf.pass == 2 &&
cpi->sf.adaptive_interp_filter_search)
cpi->sf.interp_filter_search_mask =
setup_interp_filter_search_mask(cpi);
-
// Set various flags etc to special state if it is a key frame.
if (frame_is_intra_only(cm)) {
// Reset the loop filter deltas and segmentation map.
@@ -2994,20 +3149,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
}
- // Configure experimental use of segmentation for enhanced coding of
- // static regions if indicated.
- // Only allowed in second pass of two pass (as requires lagged coding)
- // and if the relevant speed feature flag is set.
- if (oxcf->pass == 2 && cpi->sf.static_segmentation)
- configure_static_seg_features(cpi);
-
- // Check if the current frame is skippable for the partition search in the
- // second pass according to the first pass stats
- if (cpi->sf.allow_partition_search_skip && oxcf->pass == 2 &&
- (!cpi->use_svc || is_two_pass_svc(cpi))) {
- cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
- }
-
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame.
if (oxcf->pass == 0 &&
@@ -3022,31 +3163,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
vp9_clear_system_state();
-#if CONFIG_VP9_POSTPROC
- if (oxcf->noise_sensitivity > 0) {
- int l = 0;
- switch (oxcf->noise_sensitivity) {
- case 1:
- l = 20;
- break;
- case 2:
- l = 40;
- break;
- case 3:
- l = 60;
- break;
- case 4:
- case 5:
- l = 100;
- break;
- case 6:
- l = 150;
- break;
- }
- vp9_denoise(cpi->Source, cpi->Source, l);
- }
-#endif
-
#if CONFIG_INTERNAL_STATS
{
int i;
@@ -3055,24 +3171,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
#endif
- vp9_set_speed_features(cpi);
-
- vp9_set_rd_speed_thresholds(cpi);
- vp9_set_rd_speed_thresholds_sub8x8(cpi);
-
- // Decide q and q bounds.
- q = vp9_rc_pick_q_and_bounds(cpi, &bottom_index, &top_index);
-
- if (!frame_is_intra_only(cm)) {
- cm->interp_filter = cpi->sf.default_interp_filter;
- /* TODO: Decide this more intelligently */
- vp9_set_high_precision_mv(cpi, q < HIGH_PRECISION_MV_QTHRESH);
- }
-
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
- encode_without_recode_loop(cpi, q);
+ encode_without_recode_loop(cpi);
} else {
- encode_with_recode_loop(cpi, size, dest, q, bottom_index, top_index);
+ encode_with_recode_loop(cpi, size, dest);
}
#if CONFIG_VP9_TEMPORAL_DENOISING
@@ -3215,16 +3317,6 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size,
vp9_twopass_postencode_update(cpi);
}
-static void init_motion_estimation(VP9_COMP *cpi) {
- int y_stride = cpi->scaled_source.y_stride;
-
- if (cpi->sf.mv.search_method == NSTEP) {
- vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride);
- } else if (cpi->sf.mv.search_method == DIAMOND) {
- vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride);
- }
-}
-
static void check_initial_width(VP9_COMP *cpi,
#if CONFIG_VP9_HIGHBITDEPTH
int use_highbitdepth,
@@ -3243,10 +3335,11 @@ static void check_initial_width(VP9_COMP *cpi,
alloc_ref_frame_buffers(cpi);
alloc_util_frame_buffers(cpi);
- init_motion_estimation(cpi);
+ init_motion_estimation(cpi); // TODO(agrange) This can be removed.
cpi->initial_width = cm->width;
cpi->initial_height = cm->height;
+ cpi->initial_mbs = cm->MBs;
}
}
@@ -3388,14 +3481,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
int64_t *time_stamp, int64_t *time_end, int flush) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->mb.e_mbd;
RATE_CONTROL *const rc = &cpi->rc;
struct vpx_usec_timer cmptimer;
YV12_BUFFER_CONFIG *force_src_buffer = NULL;
struct lookahead_entry *last_source = NULL;
struct lookahead_entry *source = NULL;
- MV_REFERENCE_FRAME ref_frame;
int arf_src_index;
+ int i;
if (is_two_pass_svc(cpi)) {
#if CONFIG_SPATIAL_SVC
@@ -3416,11 +3508,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Note that at the moment multi_arf is only configured for 2 pass VBR and
// will not work properly with svc.
if ((oxcf->pass == 2) && !cpi->use_svc &&
- (cpi->oxcf.enable_auto_arf > 1) && (cpi->oxcf.rc_mode == VPX_VBR))
+ (cpi->oxcf.enable_auto_arf > 1))
cpi->multi_arf_allowed = 1;
else
cpi->multi_arf_allowed = 0;
- cpi->multi_arf_last_grp_enabled = 0;
// Normal defaults
cm->reset_frame_context = 0;
@@ -3535,24 +3626,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
vp9_restore_layer_context(cpi);
}
- // start with a 0 size frame
- *size = 0;
-
- /* find a free buffer for the new frame, releasing the reference previously
- * held.
- */
+ // Find a free buffer for the new frame, releasing the reference previously
+ // held.
cm->frame_bufs[cm->new_fb_idx].ref_count--;
cm->new_fb_idx = get_free_fb(cm);
- // For two pass encodes analyse the first pass stats and determine
- // the bit allocation and other parameters for this frame / group of frames.
- if ((oxcf->pass == 2) &&
- (!cpi->use_svc ||
- (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state != ENCODING))) {
- vp9_rc_get_second_pass_params(cpi);
- }
-
if (!cpi->use_svc && cpi->multi_arf_allowed) {
if (cm->frame_type == KEY_FRAME) {
init_buffer_indices(cpi);
@@ -3562,56 +3640,27 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
}
- cpi->frame_flags = *frame_flags;
-
- if (oxcf->pass == 2 &&
- cm->current_video_frame == 0 &&
- oxcf->allow_spatial_resampling &&
- oxcf->rc_mode == VPX_VBR) {
- // Internal scaling is triggered on the first frame.
- vp9_set_size_literal(cpi, oxcf->scaled_frame_width,
- oxcf->scaled_frame_height);
- }
-
- // Reset the frame pointers to the current frame size
- vp9_realloc_frame_buffer(get_frame_new_buffer(cm),
- cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS, NULL, NULL, NULL);
+ // Start with a 0 size frame.
+ *size = 0;
- alloc_util_frame_buffers(cpi);
- init_motion_estimation(cpi);
+ cpi->frame_flags = *frame_flags;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
- YV12_BUFFER_CONFIG *const buf = &cm->frame_bufs[idx].buf;
- RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1];
- ref_buf->buf = buf;
- ref_buf->idx = idx;
-#if CONFIG_VP9_HIGHBITDEPTH
- vp9_setup_scale_factors_for_frame(&ref_buf->sf,
- buf->y_crop_width, buf->y_crop_height,
- cm->width, cm->height,
- (buf->flags & YV12_FLAG_HIGHBITDEPTH) ?
- 1 : 0);
-#else
- vp9_setup_scale_factors_for_frame(&ref_buf->sf,
- buf->y_crop_width, buf->y_crop_height,
- cm->width, cm->height);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- if (vp9_is_scaled(&ref_buf->sf))
- vp9_extend_frame_borders(buf);
+ if ((oxcf->pass == 2) &&
+ (!cpi->use_svc ||
+ (is_two_pass_svc(cpi) &&
+ cpi->svc.encode_empty_frame_state != ENCODING))) {
+ vp9_rc_get_second_pass_params(cpi);
+ } else {
+ set_frame_size(cpi);
}
- set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
-
if (oxcf->aq_mode == VARIANCE_AQ) {
vp9_vaq_init();
}
+ for (i = 0; i < MAX_REF_FRAMES; ++i)
+ cpi->scaled_ref_idx[i] = INVALID_REF_BUFFER_IDX;
+
if (oxcf->pass == 1 &&
(!cpi->use_svc || is_two_pass_svc(cpi))) {
const int lossless = is_lossless_requested(oxcf);
@@ -3638,9 +3687,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
if (cm->refresh_frame_context)
- cm->frame_contexts[cm->frame_context_idx] = cm->fc;
+ cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
- // Frame was dropped, release scaled references.
+ // No frame encoded, or frame was dropped, release scaled references.
if (*size == 0) {
release_scaled_references(cpi);
}
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index 1e6047464..0e112f2ff 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -44,6 +44,7 @@ extern "C" {
#endif
#define DEFAULT_GF_INTERVAL 10
+#define INVALID_REF_BUFFER_IDX -1 // Marks an invalid reference buffer id.
typedef struct {
int nmvjointcost[MV_JOINTS];
@@ -122,7 +123,12 @@ typedef struct VP9EncoderConfig {
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:
int speed;
+ // maximum allowed bitrate for any intra frame in % of bitrate target.
unsigned int rc_max_intra_bitrate_pct;
+ // maximum allowed bitrate for any inter frame in % of bitrate target.
+ unsigned int rc_max_inter_bitrate_pct;
+ // percent of rate boost for golden frame in CBR mode.
+ unsigned int gf_cbr_boost_pct;
MODE mode;
int pass;
@@ -223,6 +229,13 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
+// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
+typedef struct TileDataEnc {
+ TileInfo tile_info;
+ int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+ int mode_map[BLOCK_SIZES][MAX_MODES];
+} TileDataEnc;
+
typedef struct VP9_COMP {
QUANTS quants;
MACROBLOCK mb;
@@ -238,10 +251,12 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG *unscaled_last_source;
YV12_BUFFER_CONFIG scaled_last_source;
+ TileDataEnc *tile_data;
+
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
- int scaled_ref_idx[3];
+ int scaled_ref_idx[MAX_REF_FRAMES];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
@@ -374,6 +389,10 @@ typedef struct VP9_COMP {
int initial_width;
int initial_height;
+ int initial_mbs; // Number of MBs in the full-size frame; to be used to
+ // normalize the firstpass stats. This will differ from the
+ // number of MBs in the current frame when the frame is
+ // scaled.
int use_svc;
diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c
index f1baf8323..c8c784b73 100644
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -138,7 +138,7 @@ static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm,
struct vpx_codec_cx_pkt pkt;
pkt.kind = VPX_CODEC_FPMB_STATS_PKT;
pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
- pkt.data.firstpass_mb_stats.sz = cm->MBs * sizeof(uint8_t);
+ pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t);
vpx_codec_pkt_list_add(pktlist, &pkt);
}
#endif
@@ -483,7 +483,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
- vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->MBs);
+ vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs);
}
#endif
@@ -934,12 +934,14 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vp9_clear_system_state();
{
FIRSTPASS_STATS fps;
- // The minimum error here insures some bit alocation to frames even
+ // The minimum error here insures some bit allocation to frames even
// in static regions. The allocation per MB declines for larger formats
// where the typical "real" energy per MB also falls.
// Initial estimate here uses sqrt(mbs) to define the min_err, where the
- // number of mbs is propotional to image area.
- const double min_err = 200 * sqrt(cm->MBs);
+ // number of mbs is proportional to the image area.
+ const int num_mbs =
+ cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
+ const double min_err = 200 * sqrt(num_mbs);
fps.frame = cm->current_video_frame;
fps.spatial_layer_id = cpi->svc.spatial_layer_id;
@@ -947,9 +949,9 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
fps.intra_error = (double)(intra_error >> 8) + min_err;
fps.count = 1.0;
- fps.pcnt_inter = (double)intercount / cm->MBs;
- fps.pcnt_second_ref = (double)second_ref_count / cm->MBs;
- fps.pcnt_neutral = (double)neutral_count / cm->MBs;
+ fps.pcnt_inter = (double)intercount / num_mbs;
+ fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
+ fps.pcnt_neutral = (double)neutral_count / num_mbs;
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -960,7 +962,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / mvcount)) / mvcount;
fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
fps.new_mv_count = new_mv_count;
- fps.pcnt_motion = (double)mvcount / cm->MBs;
+ fps.pcnt_motion = (double)mvcount / num_mbs;
} else {
fps.MVr = 0.0;
fps.mvr_abs = 0.0;
@@ -1074,7 +1076,8 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
if (section_target_bandwidth <= 0) {
return rc->worst_quality; // Highest value allowed
} else {
- const int num_mbs = cpi->common.MBs;
+ const int num_mbs =
+ cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
const double section_err = stats->coded_error / stats->count;
const double err_per_mb = section_err / num_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
@@ -1188,9 +1191,12 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
#define LOW_SR_DIFF_TRHESH 0.1
#define SR_DIFF_MAX 128.0
-static double get_sr_decay_rate(const VP9_COMMON *cm,
+static double get_sr_decay_rate(const VP9_COMP *cpi,
const FIRSTPASS_STATS *frame) {
- double sr_diff = (frame->sr_coded_error - frame->coded_error) / cm->MBs;
+ const int num_mbs =
+ cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
+ double sr_diff =
+ (frame->sr_coded_error - frame->coded_error) / num_mbs;
double sr_decay = 1.0;
const double motion_amplitude_factor =
frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
@@ -1207,19 +1213,19 @@ static double get_sr_decay_rate(const VP9_COMMON *cm,
// This function gives an estimate of how badly we believe the prediction
// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const VP9_COMMON *cm,
+static double get_zero_motion_factor(const VP9_COMP *cpi,
const FIRSTPASS_STATS *frame) {
const double zero_motion_pct = frame->pcnt_inter -
frame->pcnt_motion;
- double sr_decay = get_sr_decay_rate(cm, frame);
+ double sr_decay = get_sr_decay_rate(cpi, frame);
return MIN(sr_decay, zero_motion_pct);
}
#define ZM_POWER_FACTOR 0.75
-static double get_prediction_decay_rate(const VP9_COMMON *cm,
+static double get_prediction_decay_rate(const VP9_COMP *cpi,
const FIRSTPASS_STATS *next_frame) {
- const double sr_decay_rate = get_sr_decay_rate(cm, next_frame);
+ const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame);
const double zero_motion_factor =
(0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
ZM_POWER_FACTOR));
@@ -1314,9 +1320,11 @@ static double calc_frame_boost(VP9_COMP *cpi,
vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME],
cpi->common.bit_depth);
const double boost_correction = MIN((0.5 + (lq * 0.015)), 1.5);
+ const int num_mbs =
+ cpi->oxcf.allow_spatial_resampling ? cpi->initial_mbs : cpi->common.MBs;
// Underlying boost factor is based on inter error ratio.
- frame_boost = (BASELINE_ERR_PER_MB * cpi->common.MBs) /
+ frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
frame_boost = frame_boost * BOOST_FACTOR * boost_correction;
@@ -1365,7 +1373,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
@@ -1404,7 +1412,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset,
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(&cpi->common, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(cpi, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR : decay_accumulator;
}
@@ -1723,7 +1731,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
gf_group_err -= gf_first_frame_err;
// Motion breakout threshold for loop below depends on image size.
- mv_ratio_accumulator_thresh = (cpi->common.width + cpi->common.height) / 4.0;
+ mv_ratio_accumulator_thresh =
+ (cpi->common.height + cpi->common.width) / 4.0;
// Set a maximum and minimum interval for the GF group.
// If the image appears almost completely static we can extend beyond this.
@@ -1775,14 +1784,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
last_loop_decay_rate = loop_decay_rate;
- loop_decay_rate = get_prediction_decay_rate(&cpi->common, &next_frame);
+ loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
decay_accumulator = decay_accumulator * loop_decay_rate;
// Monitor for static sections.
zero_motion_accumulator =
MIN(zero_motion_accumulator,
- get_zero_motion_factor(&cpi->common, &next_frame));
+ get_zero_motion_factor(cpi, &next_frame));
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -2048,8 +2057,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
break;
// How fast is the prediction quality decaying?
- loop_decay_rate = get_prediction_decay_rate(&cpi->common,
- twopass->stats_in);
+ loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in);
// We want to know something about the recent past... rather than
// as used elsewhere where we are concerned with decay in prediction
@@ -2160,7 +2168,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Monitor for static sections.
zero_motion_accumulator =
MIN(zero_motion_accumulator,
- get_zero_motion_factor(&cpi->common, &next_frame));
+ get_zero_motion_factor(cpi, &next_frame));
// Not all frames in the group are necessarily used in calculating boost.
if ((i <= rc->max_gf_interval) ||
@@ -2171,7 +2179,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// How fast is prediction quality decaying.
if (!detect_flash(twopass, 0)) {
const double loop_decay_rate =
- get_prediction_decay_rate(&cpi->common, &next_frame);
+ get_prediction_decay_rate(cpi, &next_frame);
decay_accumulator *= loop_decay_rate;
decay_accumulator = MAX(decay_accumulator, MIN_DECAY_FACTOR);
av_decay_accumulator += decay_accumulator;
@@ -2295,6 +2303,24 @@ void configure_buffer_updates(VP9_COMP *cpi) {
}
}
+int is_skippable_frame(const VP9_COMP *cpi) {
+ // If the current frame does not have non-zero motion vector detected in the
+ // first pass, and so do its previous and forward frames, then this frame
+ // can be skipped for partition check, and the partition size is assigned
+ // according to the variance
+ const SVC *const svc = &cpi->svc;
+ const TWO_PASS *const twopass = is_two_pass_svc(cpi) ?
+ &svc->layer_context[svc->spatial_layer_id].twopass : &cpi->twopass;
+
+ return (!frame_is_intra_only(&cpi->common) &&
+ twopass->stats_in - 2 > twopass->stats_in_start &&
+ twopass->stats_in < twopass->stats_in_end &&
+ (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion
+ == 1 &&
+ (twopass->stats_in - 2)->pcnt_inter - (twopass->stats_in - 2)->pcnt_motion
+ == 1 &&
+ twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
+}
void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
@@ -2329,11 +2355,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
- // Correction to rate target based on prior over or under shoot.
- if (cpi->oxcf.rc_mode == VPX_VBR)
- vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target);
-
- vp9_rc_set_frame_target(cpi, target_rate);
cm->frame_type = INTER_FRAME;
if (lc != NULL) {
@@ -2347,6 +2368,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
}
}
+ // Do the firstpass stats indicate that this frame is skippable for the
+ // partition search?
+ if (cpi->sf.allow_partition_search_skip &&
+ cpi->oxcf.pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
+ }
+
return;
}
@@ -2377,8 +2405,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
this_frame_copy = this_frame;
// Keyframe and section processing.
- if (rc->frames_to_key == 0 ||
- (cpi->frame_flags & FRAMEFLAGS_KEY)) {
+ if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
// Define next KF group and assign bits to it.
find_next_key_frame(cpi, &this_frame_copy);
} else {
@@ -2431,6 +2458,13 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
configure_buffer_updates(cpi);
+ // Do the firstpass stats indicate that this frame is skippable for the
+ // partition search?
+ if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
+ (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
+ }
+
target_rate = gf_group->bit_allocation[gf_group->index];
if (cpi->common.frame_type == KEY_FRAME)
target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate);
@@ -2439,18 +2473,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
rc->base_frame_target = target_rate;
- // Correction to rate target based on prior over or under shoot.
- if (cpi->oxcf.rc_mode == VPX_VBR)
- vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target);
-
- vp9_rc_set_frame_target(cpi, target_rate);
-
// Update the total stats remaining structure.
subtract_stats(&twopass->total_left_stats, &this_frame);
}
#define MINQ_ADJ_LIMIT 32
-#define Q_LIMIT_STEP 1
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2495,16 +2522,22 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) {
--twopass->extend_maxq;
if (rc->rolling_target_bits >= rc->rolling_actual_bits)
- twopass->extend_minq += Q_LIMIT_STEP;
+ ++twopass->extend_minq;
// Overshoot.
} else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) {
--twopass->extend_minq;
if (rc->rolling_target_bits < rc->rolling_actual_bits)
- twopass->extend_maxq += Q_LIMIT_STEP;
+ ++twopass->extend_maxq;
} else {
+ // Adjustment for extreme local overshoot.
+ if (rc->projected_frame_size > (2 * rc->base_frame_target) &&
+ rc->projected_frame_size > (2 * rc->avg_frame_bandwidth))
+ ++twopass->extend_maxq;
+
+ // Unwind undershoot or overshoot adjustment.
if (rc->rolling_target_bits < rc->rolling_actual_bits)
--twopass->extend_minq;
- if (rc->rolling_target_bits > rc->rolling_actual_bits)
+ else if (rc->rolling_target_bits > rc->rolling_actual_bits)
--twopass->extend_maxq;
}
twopass->extend_minq = clamp(twopass->extend_minq, 0, MINQ_ADJ_LIMIT);
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c
index ae9ed66cd..28f12916e 100644
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -452,7 +452,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
args->dist += dist;
}
-static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][INTER_MODES] = {
+static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = {
+ {THR_DC, THR_H_PRED, THR_V_PRED},
{THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV},
{THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG},
{THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA},
@@ -461,13 +462,11 @@ static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][INTER_MODES] = {
// TODO(jingning) placeholder for inter-frame non-RD mode decision.
// this needs various further optimizations. to be continued..
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
- BLOCK_SIZE bsize,
- PICK_MODE_CONTEXT *ctx) {
+ TileDataEnc *tile_data,
+ int mi_row, int mi_col, RD_COST *rd_cost,
+ BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -480,11 +479,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
- int64_t best_rd = INT64_MAX;
- int64_t this_rd = INT64_MAX;
+ RD_COST this_rdc, best_rdc;
uint8_t skip_txfm = 0;
- int rate = INT_MAX;
- int64_t dist = INT64_MAX;
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
@@ -500,8 +496,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
const int8_t segment_id = mbmi->segment_id;
const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
- const int *const rd_thresh_freq_fact = cpi->rd.thresh_freq_fact[bsize];
- INTERP_FILTER filter_ref = cm->interp_filter;
+ const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
+ INTERP_FILTER filter_ref;
const int bsl = mi_width_log2_lookup[bsize];
const int pred_filter_search = cm->interp_filter == SWITCHABLE ?
(((mi_row + mi_col) >> bsl) +
@@ -544,9 +540,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
x->skip = 0;
+ if (xd->up_available)
+ filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
+ else if (xd->left_available)
+ filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter;
+ else
+ filter_ref = cm->interp_filter;
+
// initialize mode decisions
- *returnrate = INT_MAX;
- *returndistortion = INT64_MAX;
+ vp9_rd_cost_reset(&best_rdc);
+ vp9_rd_cost_reset(&this_rdc);
+ vp9_rd_cost_reset(rd_cost);
vpx_memset(mbmi, 0, sizeof(MB_MODE_INFO));
mbmi->sb_type = bsize;
mbmi->ref_frame[0] = NONE;
@@ -557,17 +561,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
EIGHTTAP : cm->interp_filter;
mbmi->segment_id = segment_id;
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
PREDICTION_MODE this_mode;
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
- if (xd->up_available)
- filter_ref = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
- else if (xd->left_available)
- filter_ref = xd->mi[-1].src_mi->mbmi.interp_filter;
-
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
@@ -576,10 +575,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
sf, sf);
if (!cm->error_resilient_mode)
- vp9_find_mv_refs(cm, xd, tile, xd->mi[0].src_mi, ref_frame,
+ vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
candidates, mi_row, mi_col);
else
- const_motion[ref_frame] = mv_refs_rt(cm, xd, tile, xd->mi[0].src_mi,
+ const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
+ xd->mi[0].src_mi,
ref_frame, candidates,
mi_row, mi_col);
@@ -606,27 +606,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int rate_mv = 0;
int mode_rd_thresh;
- if (const_motion[ref_frame] &&
- (this_mode == NEARMV || this_mode == ZEROMV))
+ if (const_motion[ref_frame] && this_mode == NEARMV)
continue;
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
mode_rd_thresh =
- rd_threshes[mode_idx[ref_frame -
- LAST_FRAME][INTER_OFFSET(this_mode)]];
- if (rd_less_than_thresh(best_rd, mode_rd_thresh,
+ rd_threshes[mode_idx[ref_frame][INTER_OFFSET(this_mode)]];
+ if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
rd_thresh_freq_fact[this_mode]))
continue;
if (this_mode == NEWMV) {
+ if (ref_frame > LAST_FRAME)
+ continue;
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
- this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
+ this_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rd))
+ &rate_mv, best_rdc.rdcost))
continue;
}
@@ -642,7 +642,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// motion vector is at sub-pixel accuracy level for luma component, i.e.,
// the last three bits are all zeros.
if (cpi->sf.reuse_inter_pred_sby) {
- if (this_mode == NEARESTMV) {
+ if (!this_mode_pred) {
this_mode_pred = &tmp[3];
} else {
this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
@@ -699,30 +699,34 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->interp_filter = best_filter;
mbmi->tx_size = pf_tx_size[mbmi->interp_filter];
- rate = pf_rate[mbmi->interp_filter];
- dist = pf_dist[mbmi->interp_filter];
+ this_rdc.rate = pf_rate[mbmi->interp_filter];
+ this_rdc.dist = pf_dist[mbmi->interp_filter];
var_y = pf_var[mbmi->interp_filter];
sse_y = pf_sse[mbmi->interp_filter];
x->skip_txfm[0] = skip_txfm;
} else {
mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP: filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
+ &var_y, &sse_y);
}
- rate += rate_mv;
- rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
+ this_rdc.rate += rate_mv;
+ this_rdc.rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(this_mode)];
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
if (cpi->allow_encode_breakout) {
encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame,
- this_mode, var_y, sse_y, yv12_mb, &rate, &dist);
+ this_mode, var_y, sse_y, yv12_mb,
+ &this_rdc.rate, &this_rdc.dist);
if (x->skip) {
- rate += rate_mv;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
+ this_rdc.rate += rate_mv;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
}
}
@@ -734,10 +738,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
(void)ctx;
#endif
- if (this_rd < best_rd || x->skip) {
- best_rd = this_rd;
- *returnrate = rate;
- *returndistortion = dist;
+ if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
+ best_rdc = this_rdc;
best_mode = this_mode;
best_pred_filter = mbmi->interp_filter;
best_tx_size = mbmi->tx_size;
@@ -757,10 +759,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (x->skip)
break;
}
- // If the current reference frame is valid and we found a usable mode,
- // we are done.
- if (best_rd < INT64_MAX)
- break;
+
+ // Check that a prediction mode has been selected.
+ assert(best_rdc.rdcost < INT64_MAX);
}
// If best prediction is not in dst buf, then copy the prediction block from
@@ -792,7 +793,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
- if (!x->skip && best_rd > inter_mode_thresh &&
+ if (!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize) {
PREDICTION_MODE this_mode;
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 0, 0 };
@@ -814,16 +815,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
vp9_foreach_transformed_block_in_plane(xd, bsize, 0,
estimate_block_intra, &args);
mbmi->tx_size = saved_tx_size;
- rate = args.rate;
- dist = args.dist;
- rate += cpi->mbmode_cost[this_mode];
- rate += intra_cost_penalty;
- this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist);
-
- if (this_rd + intra_mode_cost < best_rd) {
- best_rd = this_rd;
- *returnrate = rate;
- *returndistortion = dist;
+ this_rdc.rate = args.rate;
+ this_rdc.dist = args.dist;
+ this_rdc.rate += cpi->mbmode_cost[this_mode];
+ this_rdc.rate += intra_cost_penalty;
+ this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
+ this_rdc.rate, this_rdc.dist);
+
+ if (this_rdc.rdcost + intra_mode_cost < best_rdc.rdcost) {
+ best_rdc = this_rdc;
mbmi->mode = this_mode;
mbmi->tx_size = intra_tx_size;
mbmi->ref_frame[0] = INTRA_FRAME;
@@ -836,4 +836,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->sf.reuse_inter_pred_sby)
pd->dst = orig_dst;
}
+
+ if (is_inter_block(mbmi))
+ vp9_update_rd_thresh_fact(cpi, tile_data, bsize,
+ mode_idx[ref_frame][INTER_OFFSET(mbmi->mode)]);
+ else
+ vp9_update_rd_thresh_fact(cpi, tile_data, bsize,
+ mode_idx[ref_frame][mbmi->mode]);
+
+ *rd_cost = best_rdc;
}
diff --git a/vp9/encoder/vp9_pickmode.h b/vp9/encoder/vp9_pickmode.h
index 97aeca76a..23d347d94 100644
--- a/vp9/encoder/vp9_pickmode.h
+++ b/vp9/encoder/vp9_pickmode.h
@@ -18,10 +18,8 @@ extern "C" {
#endif
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
- const struct TileInfo *const tile,
- int mi_row, int mi_col,
- int *returnrate,
- int64_t *returndistortion,
+ TileDataEnc *tile_data,
+ int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 65bca669a..8a5b6114c 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -196,6 +196,7 @@ static int estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
const RATE_CONTROL *rc = &cpi->rc;
+ const VP9EncoderConfig *oxcf = &cpi->oxcf;
const int min_frame_target = MAX(rc->min_frame_bandwidth,
rc->avg_frame_bandwidth >> 5);
if (target < min_frame_target)
@@ -210,6 +211,11 @@ int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) {
// Clip the frame target to the maximum allowed value.
if (target > rc->max_frame_bandwidth)
target = rc->max_frame_bandwidth;
+ if (oxcf->rc_max_inter_bitrate_pct) {
+ const int max_rate = rc->avg_frame_bandwidth *
+ oxcf->rc_max_inter_bitrate_pct / 100;
+ target = MIN(target, max_rate);
+ }
return target;
}
@@ -971,7 +977,13 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+
+ // Modify best quality for second level arfs. For mode VPX_Q this
+ // becomes the baseline frame q.
+ if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
+ active_best_quality = (active_best_quality + cq_level + 1) / 2;
}
} else {
active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
@@ -1327,7 +1339,18 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
int min_frame_target = MAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
- int target = rc->avg_frame_bandwidth;
+ int target;
+
+ if (oxcf->gf_cbr_boost_pct) {
+ const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100;
+ target = cpi->refresh_golden_frame ?
+ (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio_pct) /
+ (rc->baseline_gf_interval * 100 + af_ratio_pct - 100) :
+ (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
+ (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
+ } else {
+ target = rc->avg_frame_bandwidth;
+ }
if (svc->number_temporal_layers > 1 &&
oxcf->rc_mode == VPX_CBR) {
// Note that for layers, avg_frame_bandwidth is the cumulative
@@ -1347,6 +1370,11 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
const int pct_high = (int)MIN(-diff / one_pct_bits, oxcf->over_shoot_pct);
target += (target * pct_high) / 200;
}
+ if (oxcf->rc_max_inter_bitrate_pct) {
+ const int max_rate = rc->avg_frame_bandwidth *
+ oxcf->rc_max_inter_bitrate_pct / 100;
+ target = MIN(target, max_rate);
+ }
return MAX(min_frame_target, target);
}
@@ -1436,15 +1464,25 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
rc->source_alt_ref_active = 0;
- target = calc_iframe_target_size_one_pass_cbr(cpi);
} else {
cm->frame_type = INTER_FRAME;
- target = calc_pframe_target_size_one_pass_cbr(cpi);
}
+ if (rc->frames_till_gf_update_due == 0) {
+ rc->baseline_gf_interval = DEFAULT_GF_INTERVAL;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ // NOTE: frames_till_gf_update_due must be <= frames_to_key.
+ if (rc->frames_till_gf_update_due > rc->frames_to_key)
+ rc->frames_till_gf_update_due = rc->frames_to_key;
+ cpi->refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+
+ if (cm->frame_type == KEY_FRAME)
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ else
+ target = calc_pframe_target_size_one_pass_cbr(cpi);
+
vp9_rc_set_frame_target(cpi, target);
- // Don't use gf_update by default in CBR mode.
- rc->frames_till_gf_update_due = INT_MAX;
- rc->baseline_gf_interval = INT_MAX;
}
int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c
index 7f526fc42..13e317d6d 100644
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -65,7 +65,7 @@ static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
};
static void fill_mode_costs(VP9_COMP *cpi) {
- const FRAME_CONTEXT *const fc = &cpi->common.fc;
+ const FRAME_CONTEXT *const fc = cpi->common.fc;
int i, j;
for (i = 0; i < INTRA_MODES; ++i)
@@ -280,7 +280,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cm, rd);
if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
- fill_token_costs(x->token_costs, cm->fc.coef_probs);
+ fill_token_costs(x->token_costs, cm->fc->coef_probs);
for (i = 0; i < PARTITION_CONTEXTS; ++i)
vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
@@ -295,11 +295,11 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_build_nmv_cost_table(x->nmvjointcost,
cm->allow_high_precision_mv ? x->nmvcost_hp
: x->nmvcost,
- &cm->fc.nmvc, cm->allow_high_precision_mv);
+ &cm->fc->nmvc, cm->allow_high_precision_mv);
for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
- cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
+ cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
}
}
}
@@ -594,21 +594,38 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
const SPEED_FEATURES *const sf = &cpi->sf;
RD_OPT *const rd = &cpi->rd;
int i;
+ static const int thresh_mult[2][MAX_REFS] =
+ {{2500, 2500, 2500, 4500, 4500, 2500},
+ {2000, 2000, 2000, 4000, 4000, 2000}};
+
+ for (i = 0; i < MAX_REFS; ++i) {
+ rd->thresh_mult_sub8x8[i] =
+ (sf->disable_split_mask & (1 << i)) ?
+ INT_MAX : thresh_mult[cpi->oxcf.mode == BEST][i];
+ }
+}
- for (i = 0; i < MAX_REFS; ++i)
- rd->thresh_mult_sub8x8[i] = cpi->oxcf.mode == BEST ? -500 : 0;
-
- rd->thresh_mult_sub8x8[THR_LAST] += 2500;
- rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
- rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
- rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
- rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
- rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
-
- // Check for masked out split cases.
- for (i = 0; i < MAX_REFS; ++i)
- if (sf->disable_split_mask & (1 << i))
- rd->thresh_mult_sub8x8[i] = INT_MAX;
+// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs.
+void vp9_update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data,
+ int bsize, int best_mode_index) {
+ if (cpi->sf.adaptive_rd_thresh > 0) {
+ const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
+ int mode;
+ for (mode = 0; mode < top_mode; ++mode) {
+ const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
+ const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
+ BLOCK_SIZE bs;
+ for (bs = min_size; bs <= max_size; ++bs) {
+ int *const fact = &tile_data->thresh_freq_fact[bs][mode];
+ if (mode == best_mode_index) {
+ *fact -= (*fact >> 4);
+ } else {
+ *fact = MIN(*fact + RD_THRESH_INC,
+ cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
+ }
+ }
+ }
+ }
}
int vp9_get_intra_cost_penalty(int qindex, int qdelta,
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 1aa52663a..aecca0b43 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -36,6 +36,9 @@ extern "C" {
#define MAX_MODES 30
#define MAX_REFS 6
+#define RD_THRESH_MAX_FACT 64
+#define RD_THRESH_INC 1
+
// This enumerator type needs to be kept aligned with the mode order in
// const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code.
typedef enum {
@@ -129,6 +132,7 @@ void vp9_rd_cost_reset(RD_COST *rd_cost);
void vp9_rd_cost_init(RD_COST *rd_cost);
struct TileInfo;
+struct TileDataEnc;
struct VP9_COMP;
struct macroblock;
@@ -158,6 +162,10 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
+void vp9_update_rd_thresh_fact(struct VP9_COMP *cpi,
+ struct TileDataEnc *tile_data,
+ int bsize, int best_mode_index);
+
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
int thresh_fact) {
return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index eca8e5880..e80f345e8 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -38,9 +38,6 @@
#include "vp9/encoder/vp9_rdopt.h"
#include "vp9/encoder/vp9_variance.h"
-#define RD_THRESH_MAX_FACT 64
-#define RD_THRESH_INC 1
-
#define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
(1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
@@ -478,7 +475,8 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
return;
if (!is_inter_block(mbmi)) {
- vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
+ struct encode_b_args arg = {x, NULL, &mbmi->skip};
+ vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
dist_block(plane, block, tx_size, args, xd->bd);
@@ -639,7 +637,7 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
int64_t best_rd = INT64_MAX;
TX_SIZE best_tx = max_tx_size;
- const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
+ const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
@@ -2765,35 +2763,15 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
}
-static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
- int best_mode_index) {
- if (cpi->sf.adaptive_rd_thresh > 0) {
- const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
- int mode;
- for (mode = 0; mode < top_mode; ++mode) {
- const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4);
- const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64);
- BLOCK_SIZE bs;
- for (bs = min_size; bs <= max_size; ++bs) {
- int *const fact = &cpi->rd.thresh_freq_fact[bs][mode];
- if (mode == best_mode_index) {
- *fact -= (*fact >> 4);
- } else {
- *fact = MIN(*fact + RD_THRESH_INC,
- cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
- }
- }
- }
- }
-}
-
-void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
+void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ MACROBLOCK *x,
int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -2836,9 +2814,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
int mode_skip_start = sf->mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
- const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
+ const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
int64_t mode_threshold[MAX_MODES];
- int *mode_map = rd_opt->mode_map[bsize];
+ int *mode_map = tile_data->mode_map[bsize];
const int mode_search_skip_flags = sf->mode_search_skip_flags;
vp9_zero(best_mbmode);
@@ -2869,7 +2847,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col,
+ setup_buffer_inter(cpi, x, tile_info, ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
@@ -3023,9 +3001,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
if (sf->motion_field_mode_search) {
const int mi_width = MIN(num_8x8_blocks_wide_lookup[bsize],
- tile->mi_col_end - mi_col);
+ tile_info->mi_col_end - mi_col);
const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
- tile->mi_row_end - mi_row);
+ tile_info->mi_row_end - mi_row);
const int bsl = mi_width_log2_lookup[bsize];
int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
+ get_chessboard_index(cm->current_video_frame)) & 0x1;
@@ -3036,7 +3014,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
int_mv ref_mv;
ref_mv.as_int = INVALID_MV;
- if ((mi_row - 1) >= tile->mi_row_start) {
+ if ((mi_row - 1) >= tile_info->mi_row_start) {
ref_mv = xd->mi[-xd->mi_stride].src_mi->mbmi.mv[0];
rf = xd->mi[-xd->mi_stride].src_mi->mbmi.ref_frame[0];
for (i = 0; i < mi_width; ++i) {
@@ -3047,7 +3025,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
}
}
- if ((mi_col - 1) >= tile->mi_col_start) {
+ if ((mi_col - 1) >= tile_info->mi_col_start) {
if (ref_mv.as_int == INVALID_MV)
ref_mv = xd->mi[-1].src_mi->mbmi.mv[0];
if (rf == NONE)
@@ -3420,7 +3398,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
!is_inter_block(&best_mbmode));
if (!cpi->rc.is_src_frame_alt_ref)
- update_rd_thresh_fact(cpi, bsize, best_mode_index);
+ vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index);
// macroblock modes
*mbmi = best_mbmode;
@@ -3479,7 +3457,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_tx_diff, best_filter_diff, best_mode_skippable);
}
-void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
+void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ MACROBLOCK *x,
RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
@@ -3573,7 +3553,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
assert((cm->interp_filter == SWITCHABLE) ||
(cm->interp_filter == mbmi->interp_filter));
- update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
+ vp9_update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV);
vp9_zero(best_pred_diff);
vp9_zero(best_filter_diff);
@@ -3585,14 +3565,16 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
best_pred_diff, best_tx_diff, best_filter_diff, 0);
}
-void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
- const TileInfo *const tile,
+void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
+ TileDataEnc *tile_data,
+ MACROBLOCK *x,
int mi_row, int mi_col,
RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far) {
VP9_COMMON *const cm = &cpi->common;
+ TileInfo *const tile_info = &tile_data->tile_info;
RD_OPT *const rd_opt = &cpi->rd;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -3651,10 +3633,10 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
- setup_buffer_inter(cpi, x, tile,
- ref_frame, bsize, mi_row, mi_col,
- frame_mv[NEARESTMV], frame_mv[NEARMV],
- yv12_mb);
+ setup_buffer_inter(cpi, x, tile_info,
+ ref_frame, bsize, mi_row, mi_col,
+ frame_mv[NEARESTMV], frame_mv[NEARMV],
+ yv12_mb);
} else {
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
@@ -3712,7 +3694,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// Test best rd so far against threshold for trying this mode.
if (rd_less_than_thresh(best_rd,
rd_opt->threshes[segment_id][bsize][ref_index],
- rd_opt->thresh_freq_fact[bsize][ref_index]))
+ tile_data->thresh_freq_fact[bsize][ref_index]))
continue;
comp_pred = second_ref_frame > INTRA_FRAME;
@@ -3845,7 +3827,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
int newbest, rs;
int64_t rs_rd;
mbmi->interp_filter = switchable_filter_index;
- tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
+ tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile_info,
&mbmi->ref_mvs[ref_frame][0],
second_ref, best_yrd, &rate,
&rate_y, &distortion,
@@ -3911,7 +3893,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
if (!pred_exists) {
// Handles the special case when a filter that is not in the
// switchable list (bilinear, 6-tap) is indicated at the frame level
- tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
+ tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile_info,
&mbmi->ref_mvs[ref_frame][0],
second_ref, best_yrd, &rate, &rate_y,
&distortion, &skippable, &total_sse,
@@ -4146,7 +4128,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
(cm->interp_filter == best_mbmode.interp_filter) ||
!is_inter_block(&best_mbmode));
- update_rd_thresh_fact(cpi, bsize, best_ref_index);
+ vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index);
// macroblock modes
*mbmi = best_mbmode;
diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h
index ed38ce81a..7bbc3c89a 100644
--- a/vp9/encoder/vp9_rdopt.h
+++ b/vp9/encoder/vp9_rdopt.h
@@ -29,14 +29,16 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
struct RD_COST *rd_cost, BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx, int64_t best_rd);
-void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct macroblock *x,
- const struct TileInfo *const tile,
+void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi,
+ struct TileDataEnc *tile_data,
+ struct macroblock *x,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd_so_far);
void vp9_rd_pick_inter_mode_sb_seg_skip(struct VP9_COMP *cpi,
+ struct TileDataEnc *tile_data,
struct macroblock *x,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize,
@@ -44,8 +46,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(struct VP9_COMP *cpi,
int64_t best_rd_so_far);
void vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
+ struct TileDataEnc *tile_data,
struct macroblock *x,
- const struct TileInfo *const tile,
int mi_row, int mi_col,
struct RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 9e3ee2c94..3315aa6a1 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -142,8 +142,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
if (speed >= 5) {
int i;
-
- sf->partition_search_type = FIXED_PARTITION;
sf->optimize_coefficients = 0;
sf->mv.search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
@@ -151,8 +149,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->intra_y_mode_mask[i] = INTRA_DC;
sf->intra_uv_mode_mask[i] = INTRA_DC;
}
- }
- if (speed >= 6) {
+ sf->partition_search_breakout_rate_thr = 500;
sf->mv.reduce_first_step_size = 1;
}
}
@@ -205,7 +202,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
@@ -217,8 +213,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
if (speed >= 3) {
sf->use_square_partition_only = 1;
sf->disable_filter_search_var_thresh = 100;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_ALL;
- sf->constrain_copy_partition = 1;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
sf->mv.subpel_iters_per_step = 1;
@@ -263,8 +257,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
: STRICT_NEIGHBORING_MIN_MAX;
sf->max_partition_size = BLOCK_32X32;
sf->min_partition_size = BLOCK_8X8;
- sf->partition_check =
- (frames_since_key % sf->last_partitioning_redo_frequency == 1);
sf->force_frame_boost = is_keyframe ||
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
@@ -275,6 +267,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
+ sf->adaptive_rd_thresh = 2;
+ // This feature is only enabled when partition search is disabled.
+ sf->reuse_inter_pred_sby = 1;
if (MIN(cm->width, cm->height) >= 720)
sf->partition_search_breakout_dist_thr = (1 << 25);
@@ -295,15 +290,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->partition_search_type = VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->mv.search_method = NSTEP;
-
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
-
- // This feature is only enabled when partition search is disabled.
- sf->reuse_inter_pred_sby = 1;
-
- // Increase mode checking threshold for NEWMV.
- sf->elevate_newmv_thresh = 1000;
-
sf->mv.reduce_first_step_size = 1;
}
@@ -348,7 +335,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->adaptive_rd_thresh = 0;
- sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_OFF;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
@@ -368,7 +354,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->min_partition_size = BLOCK_4X4;
sf->adjust_partitioning_from_last_frame = 0;
sf->last_partitioning_redo_frequency = 4;
- sf->constrain_copy_partition = 0;
sf->disable_split_mask = 0;
sf->mode_search_skip_flags = 0;
sf->force_frame_boost = 0;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 951b4af22..a314f6040 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -93,12 +93,6 @@ typedef enum {
} MOTION_THRESHOLD;
typedef enum {
- LAST_FRAME_PARTITION_OFF = 0,
- LAST_FRAME_PARTITION_LOW_MOTION = 1,
- LAST_FRAME_PARTITION_ALL = 2
-} LAST_FRAME_PARTITION_METHOD;
-
-typedef enum {
USE_FULL_RD = 0,
USE_LARGESTALL,
USE_TX_8X8
@@ -242,15 +236,6 @@ typedef struct SPEED_FEATURES {
// level within a frame.
int allow_skip_recode;
- // This variable allows us to reuse the last frames partition choices
- // (64x64 v 32x32 etc) for this frame. It can be set to only use the last
- // frame as a starting point in low motion scenes or always use it. If set
- // we use last partitioning_redo frequency to determine how often to redo
- // the partitioning from scratch. Adjust_partitioning_from_last_frame
- // enables us to adjust up or down one partitioning from the last frames
- // partitioning.
- LAST_FRAME_PARTITION_METHOD use_lastframe_partitioning;
-
// The threshold is to determine how slow the motino is, it is used when
// use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
MOTION_THRESHOLD lf_motion_threshold;
@@ -264,8 +249,6 @@ typedef struct SPEED_FEATURES {
// precise but significantly faster than the non lp version.
int use_lp32x32fdct;
- // TODO(JBB): remove this as its no longer used.
-
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
@@ -303,12 +286,6 @@ typedef struct SPEED_FEATURES {
// use_lastframe_partitioning is set.
int last_partitioning_redo_frequency;
- // This enables constrained copy partitioning, which, given an input block
- // size bsize, will copy previous partition for partitions less than bsize,
- // otherwise bsize partition is used. bsize is currently set to 16x16.
- // Used for the case where motion is detected in superblock.
- int constrain_copy_partition;
-
// Disables sub 8x8 blocksizes in different scenarios: Choices are to disable
// it always, to allow it for only Last frame and Intra, disable it for all
// inter modes or to enable it always.
@@ -342,10 +319,6 @@ typedef struct SPEED_FEATURES {
// Fast quantization process path
int use_quant_fp;
- // Search through variable block partition types in non-RD mode decision
- // encoding process for RTC.
- int partition_check;
-
// Use finer quantizer in every other few frames that run variable block
// partition type search.
int force_frame_boost;
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c
index adf01bf35..0166a50a0 100644
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -313,7 +313,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] =
cpi->coef_counts[tx_size][type][ref];
vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
- cpi->common.fc.coef_probs[tx_size][type][ref];
+ cpi->common.fc->coef_probs[tx_size][type][ref];
unsigned int (*const eob_branch)[COEFF_CONTEXTS] =
cpi->common.counts.eob_branch[tx_size][type][ref];
const uint8_t *const band = get_band_translate(tx_size);
diff --git a/vp9/encoder/x86/vp9_denoiser_sse2.c b/vp9/encoder/x86/vp9_denoiser_sse2.c
index bf5fa889f..4ddee7b74 100644
--- a/vp9/encoder/x86/vp9_denoiser_sse2.c
+++ b/vp9/encoder/x86/vp9_denoiser_sse2.c
@@ -23,18 +23,17 @@
// Compute the sum of all pixel differences of this MB.
static INLINE int sum_diff_16x1(__m128i acc_diff) {
const __m128i k_1 = _mm_set1_epi16(1);
- const __m128i acc_diff_lo = _mm_srai_epi16(
- _mm_unpacklo_epi8(acc_diff, acc_diff), 8);
- const __m128i acc_diff_hi = _mm_srai_epi16(
- _mm_unpackhi_epi8(acc_diff, acc_diff), 8);
+ const __m128i acc_diff_lo =
+ _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8);
+ const __m128i acc_diff_hi =
+ _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8);
const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi);
const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1);
- const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba,
- _mm_srli_si128(hg_fe_dc_ba, 8));
- const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba,
- _mm_srli_si128(hgfe_dcba, 4));
- int sum_diff = _mm_cvtsi128_si32(hgfedcba);
- return sum_diff;
+ const __m128i hgfe_dcba =
+ _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8));
+ const __m128i hgfedcba =
+ _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4));
+ return _mm_cvtsi128_si32(hgfedcba);
}
// Denoise a 16x1 vector.
@@ -51,8 +50,8 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
__m128i acc_diff) {
// Calculate differences
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
- const __m128i v_mc_running_avg_y = _mm_loadu_si128(
- (const __m128i *)(&mc_running_avg_y[0]));
+ const __m128i v_mc_running_avg_y =
+ _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
__m128i v_running_avg_y;
const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
@@ -60,8 +59,8 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0);
// Clamp absolute difference to 16 to be used to get mask. Doing this
// allows us to use _mm_cmpgt_epi8, which operates on signed byte.
- const __m128i clamped_absdiff = _mm_min_epu8(
- _mm_or_si128(pdiff, ndiff), *k_16);
+ const __m128i clamped_absdiff =
+ _mm_min_epu8(_mm_or_si128(pdiff, ndiff), *k_16);
// Get masks for l2 l1 and l0 adjustments.
const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff);
const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff);
@@ -95,24 +94,22 @@ static INLINE __m128i vp9_denoiser_16x1_sse2(const uint8_t *sig,
}
// Denoise a 16x1 vector with a weaker filter.
-static INLINE __m128i vp9_denoiser_adj_16x1_sse2(const uint8_t *sig,
- const uint8_t *mc_running_avg_y,
- uint8_t *running_avg_y,
- const __m128i k_0,
- const __m128i k_delta,
- __m128i acc_diff) {
+static INLINE __m128i vp9_denoiser_adj_16x1_sse2(
+ const uint8_t *sig, const uint8_t *mc_running_avg_y,
+ uint8_t *running_avg_y, const __m128i k_0,
+ const __m128i k_delta, __m128i acc_diff) {
__m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0]));
// Calculate differences.
const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0]));
const __m128i v_mc_running_avg_y =
- _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
+ _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0]));
const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig);
const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y);
// Obtain the sign. FF if diff is negative.
const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0);
// Clamp absolute difference to delta to get the adjustment.
const __m128i adj =
- _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
+ _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta);
// Restore the sign and get positive and negative adjustments.
__m128i padj, nadj;
padj = _mm_andnot_si128(diff_sign, adj);
@@ -128,19 +125,16 @@ static INLINE __m128i vp9_denoiser_adj_16x1_sse2(const uint8_t *sig,
return acc_diff;
}
-static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride,
- uint8_t *running_avg_y, int avg_y_stride,
- int increase_denoising,
- BLOCK_SIZE bs,
- int motion_magnitude) {
- int sum_diff_thresh;
- int r;
- int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
- unsigned char sig_buffer[2][16], mc_running_buffer[2][16],
- running_buffer[2][16];
+// Denoiser for 4xM and 8xM blocks.
+static int vp9_denoiser_NxM_sse2_small(
+ const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y,
+ int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride,
+ int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) {
+ int sum_diff_thresh, r, sum_diff = 0;
+ const int shift_inc = (increase_denoising &&
+ motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
+ 1 : 0;
+ uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16];
__m128i acc_diff = _mm_setzero_si128();
const __m128i k_0 = _mm_setzero_si128();
const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -148,145 +142,51 @@ static int vp9_denoiser_4xM_sse2(const uint8_t *sig, int sig_stride,
const __m128i k_16 = _mm_set1_epi8(16);
// Modify each level's adjustment according to motion_magnitude.
const __m128i l3 = _mm_set1_epi8(
- (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 7 + shift_inc : 6);
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
// Difference between level 3 and level 2 is 2.
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
- int sum_diff = 0;
-
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 2); ++r) {
- vpx_memcpy(sig_buffer[r], sig, 4);
- vpx_memcpy(sig_buffer[r] + 4, sig + sig_stride, 4);
- vpx_memcpy(sig_buffer[r] + 8, sig + sig_stride * 2, 4);
- vpx_memcpy(sig_buffer[r] + 12, sig + sig_stride * 3, 4);
- vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, 4);
- vpx_memcpy(mc_running_buffer[r] + 4, mc_running_avg_y +
- mc_avg_y_stride, 4);
- vpx_memcpy(mc_running_buffer[r] + 8, mc_running_avg_y +
- mc_avg_y_stride * 2, 4);
- vpx_memcpy(mc_running_buffer[r] + 12, mc_running_avg_y +
- mc_avg_y_stride * 3, 4);
- vpx_memcpy(running_buffer[r], running_avg_y, 4);
- vpx_memcpy(running_buffer[r] + 4, running_avg_y +
- avg_y_stride, 4);
- vpx_memcpy(running_buffer[r] + 8, running_avg_y +
- avg_y_stride * 2, 4);
- vpx_memcpy(running_buffer[r] + 12, running_avg_y +
- avg_y_stride * 3, 4);
- acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
- mc_running_buffer[r],
- running_buffer[r],
- &k_0, &k_4, &k_8, &k_16,
- &l3, &l32, &l21, acc_diff);
- vpx_memcpy(running_avg_y, running_buffer[r], 4);
- vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
- vpx_memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + 8, 4);
- vpx_memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + 12, 4);
- // Update pointers for next iteration.
- sig += (sig_stride << 2);
- mc_running_avg_y += (mc_avg_y_stride << 2);
- running_avg_y += (avg_y_stride << 2);
- }
-
- {
- sum_diff = sum_diff_16x1(acc_diff);
- sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
- if (abs(sum_diff) > sum_diff_thresh) {
- // Before returning to copy the block (i.e., apply no denoising),
- // checK if we can still apply some (weaker) temporal filtering to
- // this block, that would otherwise not be denoised at all. Simplest
- // is to apply an additional adjustment to running_avg_y to bring it
- // closer to sig. The adjustment is capped by a maximum delta, and
- // chosen such that in most cases the resulting sum_diff will be
- // within the accceptable range given by sum_diff_thresh.
+ const uint8_t shift = (width == 4) ? 2 : 1;
- // The delta is set by the excess of absolute pixel diff over the
- // threshold.
- int delta = ((abs(sum_diff) - sum_diff_thresh)
- >> num_pels_log2_lookup[bs]) + 1;
- // Only apply the adjustment for max delta up to 3.
- if (delta < 4) {
- const __m128i k_delta = _mm_set1_epi8(delta);
- running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- sum_diff = 0;
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 2); ++r) {
- acc_diff = vp9_denoiser_adj_16x1_sse2(
- sig_buffer[r], mc_running_buffer[r],
- running_buffer[r], k_0, k_delta,
- acc_diff);
- vpx_memcpy(running_avg_y, running_buffer[r], 4);
- vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 4, 4);
- vpx_memcpy(running_avg_y + avg_y_stride * 2,
- running_buffer[r] + 8, 4);
- vpx_memcpy(running_avg_y + avg_y_stride * 3,
- running_buffer[r] + 12, 4);
- // Update pointers for next iteration.
- running_avg_y += (avg_y_stride << 2);
- }
- sum_diff = sum_diff_16x1(acc_diff);
- if (abs(sum_diff) > sum_diff_thresh) {
- return COPY_BLOCK;
- }
- } else {
- return COPY_BLOCK;
- }
+ for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
+ vpx_memcpy(sig_buffer[r], sig, width);
+ vpx_memcpy(sig_buffer[r] + width, sig + sig_stride, width);
+ vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, width);
+ vpx_memcpy(mc_running_buffer[r] + width,
+ mc_running_avg_y + mc_avg_y_stride, width);
+ vpx_memcpy(running_buffer[r], running_avg_y, width);
+ vpx_memcpy(running_buffer[r] + width,
+ running_avg_y + avg_y_stride, width);
+ if (width == 4) {
+ vpx_memcpy(sig_buffer[r] + width * 2, sig + sig_stride * 2, width);
+ vpx_memcpy(sig_buffer[r] + width * 3, sig + sig_stride * 3, width);
+ vpx_memcpy(mc_running_buffer[r] + width * 2,
+ mc_running_avg_y + mc_avg_y_stride * 2, width);
+ vpx_memcpy(mc_running_buffer[r] + width * 3,
+ mc_running_avg_y + mc_avg_y_stride * 3, width);
+ vpx_memcpy(running_buffer[r] + width * 2,
+ running_avg_y + avg_y_stride * 2, width);
+ vpx_memcpy(running_buffer[r] + width * 3,
+ running_avg_y + avg_y_stride * 3, width);
}
- }
- return FILTER_BLOCK;
-}
-
-static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride,
- uint8_t *running_avg_y, int avg_y_stride,
- int increase_denoising,
- BLOCK_SIZE bs,
- int motion_magnitude) {
- int sum_diff_thresh;
- int r;
- int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
- unsigned char sig_buffer[8][16], mc_running_buffer[8][16],
- running_buffer[8][16];
- __m128i acc_diff = _mm_setzero_si128();
- const __m128i k_0 = _mm_setzero_si128();
- const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
- const __m128i k_8 = _mm_set1_epi8(8);
- const __m128i k_16 = _mm_set1_epi8(16);
- // Modify each level's adjustment according to motion_magnitude.
- const __m128i l3 = _mm_set1_epi8(
- (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 7 + shift_inc : 6);
- // Difference between level 3 and level 2 is 2.
- const __m128i l32 = _mm_set1_epi8(2);
- // Difference between level 2 and level 1 is 1.
- const __m128i l21 = _mm_set1_epi8(1);
- int sum_diff = 0;
-
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
- vpx_memcpy(sig_buffer[r], sig, 8);
- vpx_memcpy(sig_buffer[r] + 8, sig + sig_stride, 8);
- vpx_memcpy(mc_running_buffer[r], mc_running_avg_y, 8);
- vpx_memcpy(mc_running_buffer[r] + 8, mc_running_avg_y +
- mc_avg_y_stride, 8);
- vpx_memcpy(running_buffer[r], running_avg_y, 8);
- vpx_memcpy(running_buffer[r] + 8, running_avg_y +
- avg_y_stride, 8);
acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r],
mc_running_buffer[r],
running_buffer[r],
&k_0, &k_4, &k_8, &k_16,
&l3, &l32, &l21, acc_diff);
- vpx_memcpy(running_avg_y, running_buffer[r], 8);
- vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
+ vpx_memcpy(running_avg_y, running_buffer[r], width);
+ vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width);
+ if (width == 4) {
+ vpx_memcpy(running_avg_y + avg_y_stride * 2,
+ running_buffer[r] + width * 2, width);
+ vpx_memcpy(running_avg_y + avg_y_stride * 3,
+ running_buffer[r] + width * 3, width);
+ }
// Update pointers for next iteration.
- sig += (sig_stride << 1);
- mc_running_avg_y += (mc_avg_y_stride << 1);
- running_avg_y += (avg_y_stride << 1);
+ sig += (sig_stride << shift);
+ mc_running_avg_y += (mc_avg_y_stride << shift);
+ running_avg_y += (avg_y_stride << shift);
}
{
@@ -294,54 +194,61 @@ static int vp9_denoiser_8xM_sse2(const uint8_t *sig, int sig_stride,
sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
if (abs(sum_diff) > sum_diff_thresh) {
// Before returning to copy the block (i.e., apply no denoising),
- // checK if we can still apply some (weaker) temporal filtering to
+ // check if we can still apply some (weaker) temporal filtering to
// this block, that would otherwise not be denoised at all. Simplest
// is to apply an additional adjustment to running_avg_y to bring it
// closer to sig. The adjustment is capped by a maximum delta, and
// chosen such that in most cases the resulting sum_diff will be
- // within the accceptable range given by sum_diff_thresh.
+ // within the acceptable range given by sum_diff_thresh.
// The delta is set by the excess of absolute pixel diff over the
// threshold.
- int delta = ((abs(sum_diff) - sum_diff_thresh)
- >> num_pels_log2_lookup[bs]) + 1;
+ const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
+ num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
const __m128i k_delta = _mm_set1_epi8(delta);
running_avg_y -= avg_y_stride * (4 << b_height_log2_lookup[bs]);
- for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> 1); ++r) {
+ for (r = 0; r < ((4 << b_height_log2_lookup[bs]) >> shift); ++r) {
acc_diff = vp9_denoiser_adj_16x1_sse2(
- sig_buffer[r], mc_running_buffer[r],
- running_buffer[r], k_0, k_delta,
- acc_diff);
- vpx_memcpy(running_avg_y, running_buffer[r], 8);
- vpx_memcpy(running_avg_y + avg_y_stride, running_buffer[r] + 8, 8);
+ sig_buffer[r], mc_running_buffer[r], running_buffer[r],
+ k_0, k_delta, acc_diff);
+ vpx_memcpy(running_avg_y, running_buffer[r], width);
+ vpx_memcpy(running_avg_y + avg_y_stride,
+ running_buffer[r] + width, width);
+ if (width == 4) {
+ vpx_memcpy(running_avg_y + avg_y_stride * 2,
+ running_buffer[r] + width * 2, width);
+ vpx_memcpy(running_avg_y + avg_y_stride * 3,
+ running_buffer[r] + width * 3, width);
+ }
// Update pointers for next iteration.
- running_avg_y += (avg_y_stride << 1);
+ running_avg_y += (avg_y_stride << shift);
}
sum_diff = sum_diff_16x1(acc_diff);
if (abs(sum_diff) > sum_diff_thresh) {
return COPY_BLOCK;
}
} else {
- return COPY_BLOCK;
+ return COPY_BLOCK;
}
}
}
return FILTER_BLOCK;
}
-static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
- const uint8_t *mc_running_avg_y,
- int mc_avg_y_stride,
- uint8_t *running_avg_y,
- int avg_y_stride,
- int increase_denoising, BLOCK_SIZE bs,
- int motion_magnitude) {
- int sum_diff_thresh;
- int r, c;
- int shift_inc = (increase_denoising &&
- motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0;
+// Denoiser for 16xM, 32xM and 64xM blocks
+static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride,
+ const uint8_t *mc_running_avg_y,
+ int mc_avg_y_stride,
+ uint8_t *running_avg_y,
+ int avg_y_stride,
+ int increase_denoising, BLOCK_SIZE bs,
+ int motion_magnitude) {
+ int sum_diff_thresh, r, c, sum_diff = 0;
+ const int shift_inc = (increase_denoising &&
+ motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
+ 1 : 0;
__m128i acc_diff[4][4];
const __m128i k_0 = _mm_setzero_si128();
const __m128i k_4 = _mm_set1_epi8(4 + shift_inc);
@@ -349,13 +256,11 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
const __m128i k_16 = _mm_set1_epi8(16);
// Modify each level's adjustment according to motion_magnitude.
const __m128i l3 = _mm_set1_epi8(
- (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ?
- 7 + shift_inc : 6);
+ (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6);
// Difference between level 3 and level 2 is 2.
const __m128i l32 = _mm_set1_epi8(2);
// Difference between level 2 and level 1 is 1.
const __m128i l21 = _mm_set1_epi8(1);
- int sum_diff = 0;
for (c = 0; c < 4; ++c) {
for (r = 0; r < 4; ++r) {
@@ -363,13 +268,11 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
}
}
- for (r = 0; r < (4 << b_height_log2_lookup[bs]); r++) {
+ for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
acc_diff[c>>4][r>>4] = vp9_denoiser_16x1_sse2(
- sig, mc_running_avg_y,
- running_avg_y,
- &k_0, &k_4, &k_8, &k_16,
- &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
+ sig, mc_running_avg_y, running_avg_y, &k_0, &k_4,
+ &k_8, &k_16, &l3, &l32, &l21, acc_diff[c>>4][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -385,8 +288,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
// Update pointers for next iteration.
sig = sig - 16 * ((4 << b_width_log2_lookup[bs]) >> 4) + sig_stride;
mc_running_avg_y = mc_running_avg_y -
- 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
- mc_avg_y_stride;
+ 16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
+ mc_avg_y_stride;
running_avg_y = running_avg_y -
16 * ((4 << b_width_log2_lookup[bs]) >> 4) +
avg_y_stride;
@@ -395,8 +298,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
{
sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising);
if (abs(sum_diff) > sum_diff_thresh) {
- int delta = ((abs(sum_diff) - sum_diff_thresh)
- >> num_pels_log2_lookup[bs]) + 1;
+ const int delta = ((abs(sum_diff) - sum_diff_thresh) >>
+ num_pels_log2_lookup[bs]) + 1;
// Only apply the adjustment for max delta up to 3.
if (delta < 4) {
@@ -408,9 +311,8 @@ static int vp9_denoiser_64_32_16xM_sse2(const uint8_t *sig, int sig_stride,
for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
for (c = 0; c < (4 << b_width_log2_lookup[bs]); c += 16) {
acc_diff[c>>4][r>>4] = vp9_denoiser_adj_16x1_sse2(
- sig, mc_running_avg_y,
- running_avg_y, k_0,
- k_delta, acc_diff[c>>4][r>>4]);
+ sig, mc_running_avg_y, running_avg_y, k_0,
+ k_delta, acc_diff[c>>4][r>>4]);
// Update pointers for next iteration.
sig += 16;
mc_running_avg_y += 16;
@@ -449,25 +351,25 @@ int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride,
BLOCK_SIZE bs,
int motion_magnitude) {
if (bs == BLOCK_4X4 || bs == BLOCK_4X8) {
- return vp9_denoiser_4xM_sse2(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude);
+ return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
+ mc_avg, mc_avg_stride,
+ avg, avg_stride,
+ increase_denoising,
+ bs, motion_magnitude, 4);
} else if (bs == BLOCK_8X4 || bs == BLOCK_8X8 || bs == BLOCK_8X16) {
- return vp9_denoiser_8xM_sse2(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude);
+ return vp9_denoiser_NxM_sse2_small(sig, sig_stride,
+ mc_avg, mc_avg_stride,
+ avg, avg_stride,
+ increase_denoising,
+ bs, motion_magnitude, 8);
} else if (bs == BLOCK_16X8 || bs == BLOCK_16X16 || bs == BLOCK_16X32 ||
bs == BLOCK_32X16|| bs == BLOCK_32X32 || bs == BLOCK_32X64 ||
bs == BLOCK_64X32 || bs == BLOCK_64X64) {
- return vp9_denoiser_64_32_16xM_sse2(sig, sig_stride,
- mc_avg, mc_avg_stride,
- avg, avg_stride,
- increase_denoising,
- bs, motion_magnitude);
+ return vp9_denoiser_NxM_sse2_big(sig, sig_stride,
+ mc_avg, mc_avg_stride,
+ avg, avg_stride,
+ increase_denoising,
+ bs, motion_magnitude);
} else {
return COPY_BLOCK;
}
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c
index adae18b48..d3c2a138c 100644
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -33,6 +33,8 @@ struct vp9_extracfg {
vp8e_tuning tuning;
unsigned int cq_level; // constrained quality level
unsigned int rc_max_intra_bitrate_pct;
+ unsigned int rc_max_inter_bitrate_pct;
+ unsigned int gf_cbr_boost_pct;
unsigned int lossless;
unsigned int frame_parallel_decoding_mode;
AQ_MODE aq_mode;
@@ -54,6 +56,8 @@ static struct vp9_extracfg default_extra_cfg = {
VP8_TUNE_PSNR, // tuning
10, // cq_level
0, // rc_max_intra_bitrate_pct
+ 0, // rc_max_inter_bitrate_pct
+ 0, // gf_cbr_boost_pct
0, // lossless
0, // frame_parallel_decoding_mode
NO_AQ, // aq_mode
@@ -380,6 +384,8 @@ static vpx_codec_err_t set_encoder_config(
// Convert target bandwidth from Kbit/s to Bit/s
oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct;
+ oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct;
+ oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct;
oxcf->best_allowed_q =
extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_min_quantizer);
@@ -649,6 +655,22 @@ static vpx_codec_err_t ctrl_set_rc_max_intra_bitrate_pct(
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_rc_max_inter_bitrate_pct(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.rc_max_inter_bitrate_pct =
+ CAST(VP8E_SET_MAX_INTER_BITRATE_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
+static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.gf_cbr_boost_pct =
+ CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -1266,6 +1288,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8E_SET_TUNING, ctrl_set_tuning},
{VP8E_SET_CQ_LEVEL, ctrl_set_cq_level},
{VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct},
+ {VP8E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct},
+ {VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct},
{VP9E_SET_LOSSLESS, ctrl_set_lossless},
{VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode},
{VP9E_SET_AQ_MODE, ctrl_set_aq_mode},